int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); float null2[p7_MAXCODE]; int i; float fsc, bsc; double Mcs; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); gx1 = p7_gmx_Create(gm->M, L); gx2 = p7_gmx_Create(gm->M, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_GForward (dsq, L, gm, gx1, &fsc); p7_GBackward(dsq, L, gm, gx2, &bsc); p7_GDecoding(gm, gx1, gx2, gx2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_GNull2_ByExpectation(gm, gx2, null2); esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_STOPWATCH *w = esl_stopwatch_Create(); char *hmmfile = esl_opt_GetArg(go, 1); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); int i; /* Read one HMM from <hmmfile> */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); bg = p7_bg_Create(abc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_bg_SetFilterByHMM(bg, hmm); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_STOPWATCH *w = esl_stopwatch_Create(); char *hmmfile = esl_opt_GetArg(go, 1); P7_HMMCACHE *hcache = NULL; char errbuf[eslERRBUFSIZE]; size_t tot_mem; int status; esl_stopwatch_Start(w); status = p7_hmmcache_Open(hmmfile, &hcache, errbuf); if (status == eslENOTFOUND) p7_Fail("Failed to read %s\n %s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("Failed to parse %s\n %s\n", hmmfile, errbuf); else if (status == eslEINCOMPAT) p7_Fail("Mixed profile types in %s\n %s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Failed to cache %s: error code %d\n", hmmfile, status); p7_hmmcache_SetNumericNames(hcache); tot_mem = p7_hmmcache_Sizeof(hcache); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("models = %d\n", hcache->n); printf("tot memory = %" PRIu64 "\n", (uint64_t) tot_mem); p7_hmmcache_Close(hcache); esl_getopts_Destroy(go); esl_stopwatch_Destroy(w); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); int N = esl_opt_GetInteger(go, "-N"); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); esl_stopwatch_Start(w); while (N--) { /* cfg rng bg gm om */ p7_Calibrate(hmm, NULL, NULL, NULL, NULL, NULL); } esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { char *hmmfile = argv[1]; /* name of HMM file to read one HMM from */ ESL_ALPHABET *abc = NULL; /* sequence alphabet */ ESL_RANDOMNESS *r = NULL; /* source of randomness */ P7_HMMFILE *hfp = NULL; /* open hmmfile */ P7_HMM *hmm = NULL; /* HMM to emit from */ P7_PROFILE *gm = NULL; /* profile HMM (scores) */ P7_BG *bg = NULL; /* null model */ P7_TRACE *tr = NULL; /* sampled trace */ ESL_SQ *sq = NULL; /* sampled digital sequence */ int n = 1000; int counts[p7T_NSTATETYPES]; int i; float sc; float nullsc; double bitscore; r = esl_randomness_CreateFast(0); tr = p7_trace_Create(); if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("failed to open %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("failed to read HMM"); sq = esl_sq_CreateDigital(abc); bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); for (i = 0; i < n; i++) { p7_ProfileEmit(r, hmm, gm, bg, sq, tr); p7_trace_GetStateUseCounts(tr, counts); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_trace_Score(tr, sq->dsq, gm, &sc); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); bitscore = (sc - nullsc)/ eslCONST_LOG2; printf("%d %8.4f\n", counts[p7T_M] + (counts[p7T_I] + counts[p7T_D])/2, bitscore); } p7_profile_Destroy(gm); esl_sq_Destroy(sq); p7_trace_Destroy(tr); esl_randomness_Destroy(r); esl_alphabet_Destroy(abc); p7_hmmfile_Close(hfp); p7_hmm_Destroy(hmm); return eslOK; }
/* Create an SSI index file for open HMM file <hfp>. * Both name and accession of HMMs are stored as keys. */ static void create_ssi_index(ESL_GETOPTS *go, P7_HMMFILE *hfp) { ESL_NEWSSI *ns = NULL; ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; int nhmm = 0; char *ssifile = NULL; uint16_t fh; int status; if (esl_sprintf(&ssifile, "%s.ssi", hfp->fname) != eslOK) p7_Die("esl_sprintf() failed"); status = esl_newssi_Open(ssifile, FALSE, &ns); if (status == eslENOTFOUND) esl_fatal("failed to open SSI index %s", ssifile); else if (status == eslEOVERWRITE) esl_fatal("SSI index %s already exists; delete or rename it", ssifile); else if (status != eslOK) esl_fatal("failed to create a new SSI index"); if (esl_newssi_AddFile(ns, hfp->fname, 0, &fh) != eslOK) /* 0 = format code (HMMs don't have any yet) */ esl_fatal("Failed to add HMM file %s to new SSI index\n", hfp->fname); printf("Working... "); fflush(stdout); while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", hfp->fname); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", hfp->fname); nhmm++; if (hmm->name == NULL) p7_Fail("Every HMM must have a name to be indexed. Failed to find name of HMM #%d\n", nhmm); if (esl_newssi_AddKey(ns, hmm->name, fh, hmm->offset, 0, 0) != eslOK) p7_Fail("Failed to add key %s to SSI index", hmm->name); if (hmm->acc) { if (esl_newssi_AddAlias(ns, hmm->acc, hmm->name) != eslOK) p7_Fail("Failed to add secondary key %s to SSI index", hmm->acc); } p7_hmm_Destroy(hmm); } if (esl_newssi_Write(ns) != eslOK) p7_Fail("Failed to write keys to ssi file %s\n", ssifile); printf("done.\n"); if (ns->nsecondary > 0) printf("Indexed %d HMMs (%ld names and %ld accessions).\n", nhmm, (long) ns->nprimary, (long) ns->nsecondary); else printf("Indexed %d HMMs (%ld names).\n", nhmm, (long) ns->nprimary); printf("SSI index written to file %s\n", ssifile); free(ssifile); esl_alphabet_Destroy(abc); esl_newssi_Close(ns); return; }
static int serial_loop(WORKER_INFO *info, struct cfg_s *cfg) { P7_BUILDER *bld = NULL; ESL_MSA *msa = NULL; ESL_MSA *postmsa = NULL; ESL_MSA **postmsa_ptr = (cfg->postmsafile != NULL) ? &postmsa : NULL; P7_HMM *hmm = NULL; char errmsg[eslERRBUFSIZE]; int status; double entropy; cfg->nali = 0; while ((status = esl_msa_Read(cfg->afp, &msa)) == eslOK) { cfg->nali++; if ((status = set_msa_name(cfg, errmsg, msa)) != eslOK) p7_Fail("%s\n", errmsg); /* cfg->nnamed gets incremented in this call */ /* bg new-HMM trarr gm om */ if ((status = p7_Builder(info->bld, msa, info->bg, &hmm, NULL, NULL, NULL, postmsa_ptr)) != eslOK) p7_Fail("build failed: %s", bld->errbuf); entropy = p7_MeanMatchRelativeEntropy(hmm, info->bg); if ((status = output_result(cfg, errmsg, cfg->nali, msa, hmm, postmsa, entropy)) != eslOK) p7_Fail(errmsg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); esl_msa_Destroy(postmsa); } return status; }
/* mpi_worker() * The main control for an MPI worker process. */ static void mpi_worker(ESL_GETOPTS *go, struct cfg_s *cfg) { int xstatus = eslOK; int status; P7_HMM *hmm = NULL; char *wbuf = NULL; double *xv = NULL; /* result: array of N scores */ int *av = NULL; /* optional result: array of N alignment lengths */ int wn = 0; char errbuf[eslERRBUFSIZE]; int pos; /* Worker initializes */ if ((status = minimum_mpi_working_buffer(go, cfg->N, &wn)) != eslOK) xstatus = status; ESL_ALLOC(wbuf, wn * sizeof(char)); ESL_ALLOC(xv, cfg->N * sizeof(double) + 2); if (esl_opt_GetBoolean(go, "-a")) ESL_ALLOC(av, cfg->N * sizeof(int)); /* Main worker loop */ while (p7_hmm_mpi_Recv(0, 0, MPI_COMM_WORLD, &wbuf, &wn, &(cfg->abc), &hmm) == eslOK) { if (esl_opt_GetBoolean(go, "--recal")) { if (( status = recalibrate_model(go, cfg, errbuf, hmm)) != eslOK) goto CLEANERROR; } if ((status = process_workunit(go, cfg, errbuf, hmm, xv, av)) != eslOK) goto CLEANERROR; pos = 0; MPI_Pack(&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Pack(xv, cfg->N, MPI_DOUBLE, wbuf, wn, &pos, MPI_COMM_WORLD); if (esl_opt_GetBoolean(go, "-a")) MPI_Pack(av, cfg->N, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); p7_hmm_Destroy(hmm); } free(wbuf); free(xv); if (av != NULL) free(av); return; CLEANERROR: pos = 0; MPI_Pack(&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Pack(errbuf, eslERRBUFSIZE, MPI_CHAR, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); if (wbuf != NULL) free(wbuf); if (hmm != NULL) p7_hmm_Destroy(hmm); if (xv != NULL) free(xv); if (av != NULL) free(av); return; ERROR: p7_Fail("Allocation error in mpi_worker"); }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); char *hmmfile = esl_opt_GetArg(go, 1); int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_TRACE *tr = p7_trace_Create(); ESL_SQ *sq = NULL; char errbuf[eslERRBUFSIZE]; int i; int status; status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); status = p7_hmmfile_Read(hfp, &abc, &hmm); if (status == eslEFORMAT) p7_Fail("Bad file format in HMM file %s:\n%s\n", hfp->fname, hfp->errbuf); else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", hfp->fname, esl_abc_DecodeType(abc->type)); else if (status == eslEOF) p7_Fail("Empty HMM file %s? No HMM data found.\n", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s\n", hfp->fname); p7_hmmfile_Close(hfp); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); sq = esl_sq_CreateDigital(abc); for (i = 0; i < N; i++) { p7_ProfileEmit(rng, hmm, gm, bg, sq, tr); esl_sq_FormatName(sq, "%s-sample%d", hmm->name, i); esl_sqio_Write(stdout, sq, eslSQFILE_FASTA, FALSE); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) esl_fatal(errbuf); esl_sq_Reuse(sq); p7_trace_Reuse(tr); } esl_sq_Destroy(sq); p7_trace_Destroy(tr); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); int i; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_oprofile_Convert(gm, om); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; struct cfg_s cfg; int status = eslOK; impl_Init(); /* processor specific initialization */ p7_FLogsumInit(); /* we're going to use table-driven Logsum() approximations at times */ /* Initialize what we can in the config structure (without knowing the alphabet yet) */ cfg.hmmfile = NULL; cfg.dbfile = NULL; cfg.do_mpi = FALSE; /* this gets reset below, if we init MPI */ cfg.nproc = 0; /* this gets reset below, if we init MPI */ cfg.my_rank = 0; /* this gets reset below, if we init MPI */ cfg.firstseq_key = NULL; cfg.n_targetseq = -1; process_commandline(argc, argv, &go, &cfg.hmmfile, &cfg.dbfile); /* is the range restricted? */ if (esl_opt_IsUsed(go, "--restrictdb_stkey") ) if ((cfg.firstseq_key = esl_opt_GetString(go, "--restrictdb_stkey")) == NULL) p7_Fail("Failure capturing --restrictdb_stkey\n"); if (esl_opt_IsUsed(go, "--restrictdb_n") ) cfg.n_targetseq = esl_opt_GetInteger(go, "--restrictdb_n"); if ( cfg.n_targetseq != -1 && cfg.n_targetseq < 1 ) p7_Fail("--restrictdb_n must be >= 1\n"); { status = serial_master(go, &cfg); } esl_getopts_Destroy(go); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; float ftol = 1e-4; /* floating-point tolerance for checking parameters against expected probs or summing to 1 */ char errbuf[eslERRBUFSIZE]; /* Read in one HMM; sets alphabet to the HMM's alphabet */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Set up a null model */ bg = p7_bg_Create(abc); /* Allocate and configure a profile from HMM and null model */ gm = p7_profile_Create(hmm->M, abc); p7_profile_Config(gm, hmm, bg); p7_profile_SetLength(gm, 400); /* 400 is arbitrary here; this is whatever your target seq length L is */ printf("profile memory consumed: %" PRId64 " bytes\n", (int64_t) p7_profile_Sizeof(gm)); /* Debugging tools allow dumping, validating the object */ if (p7_profile_Validate(gm, errbuf, ftol) != eslOK) p7_Fail("profile validation failed\n %s\n", errbuf); if (esl_opt_GetBoolean(go, "--vv")) p7_profile_Dump(stdout, gm); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
static void serial_master(ESL_GETOPTS *go, struct cfg_s *cfg) { P7_HMM *hmm = NULL; double *xv = NULL; /* results: array of N scores */ int *av = NULL; /* optional results: array of N alignment lengths */ char errbuf[eslERRBUFSIZE]; int status; if ((status = init_master_cfg(go, cfg, errbuf)) != eslOK) p7_Fail(errbuf); if ((xv = malloc(sizeof(double) * cfg->N)) == NULL) p7_Fail("allocation failed"); if (esl_opt_GetBoolean(go, "-a") && (av = malloc(sizeof(int) * cfg->N)) == NULL) p7_Fail("allocation failed"); while ((status = p7_hmmfile_Read(cfg->hfp, &(cfg->abc), &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", cfg->hmmfile); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", cfg->hmmfile); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", cfg->hmmfile); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", cfg->hmmfile); if (cfg->bg == NULL) { if (esl_opt_GetBoolean(go, "--bgflat")) cfg->bg = p7_bg_CreateUniform(cfg->abc); else cfg->bg = p7_bg_Create(cfg->abc); p7_bg_SetLength(cfg->bg, esl_opt_GetInteger(go, "-L")); /* set the null model background length in both master and workers. */ } if (esl_opt_GetBoolean(go, "--recal")) { if (recalibrate_model(go, cfg, errbuf, hmm) != eslOK) p7_Fail(errbuf); } if (process_workunit(go, cfg, errbuf, hmm, xv, av) != eslOK) p7_Fail(errbuf); if (output_result (go, cfg, errbuf, hmm, xv, av) != eslOK) p7_Fail(errbuf); p7_hmm_Destroy(hmm); } free(xv); if (av != NULL) free(av); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_ALPHABET *abc = NULL; char *msvfile = esl_opt_GetArg(go, 1); FILE *msvfp = NULL; P7_OPROFILE *om = NULL; int nmodel = 0; uint64_t totM = 0; int status; esl_stopwatch_Start(w); if ((msvfp = fopen(msvfile, "r")) == NULL) p7_Fail("Failed to open MSV file %s for reading.\n", msvfile); while ((status = p7_oprofile_ReadMSV(msvfp, &abc, NULL, &om)) == eslOK) { nmodel++; totM += om->M; p7_oprofile_Destroy(om); } if (status == eslEFORMAT) p7_Fail("bad file format in profile file %s", msvfile); else if (status == eslEINCOMPAT) p7_Fail("profile file %s contains different alphabets", msvfile); else if (status != eslEOF) p7_Fail("Unexpected error in reading profiles from %s", msvfile); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# number of models: %d\n", nmodel); printf("# total M: %" PRId64 "\n", totM); fclose(msvfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
/* multifetch: * given a file containing lines with one name or key per line; * parse the file line-by-line; * if we have an SSI index available, retrieve the HMMs by key * as we see each line; * else, without an SSI index, store the keys in a hash, then * read the entire HMM file in a single pass, outputting HMMs * that are in our keylist. * * Note that with an SSI index, you get the HMMs in the order they * appear in the <keyfile>, but without an SSI index, you get HMMs in * the order they occur in the HMM file. */ static void multifetch(ESL_GETOPTS *go, FILE *ofp, char *keyfile, P7_HMMFILE *hfp) { ESL_KEYHASH *keys = esl_keyhash_Create(); ESL_FILEPARSER *efp = NULL; ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; int nhmm = 0; char *key; int keylen; int keyidx; int status; if (esl_fileparser_Open(keyfile, NULL, &efp) != eslOK) p7_Fail("Failed to open key file %s\n", keyfile); esl_fileparser_SetCommentChar(efp, '#'); while (esl_fileparser_NextLine(efp) == eslOK) { if (esl_fileparser_GetTokenOnLine(efp, &key, &keylen) != eslOK) p7_Fail("Failed to read HMM name on line %d of file %s\n", efp->linenumber, keyfile); status = esl_key_Store(keys, key, &keyidx); if (status == eslEDUP) p7_Fail("HMM key %s occurs more than once in file %s\n", key, keyfile); if (hfp->ssi != NULL) { onefetch(go, ofp, key, hfp); nhmm++; } } if (hfp->ssi == NULL) { while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", hfp->fname); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", hfp->fname); if (esl_key_Lookup(keys, hmm->name, &keyidx) == eslOK || ((hmm->acc) && esl_key_Lookup(keys, hmm->acc, &keyidx) == eslOK)) { p7_hmmfile_WriteASCII(ofp, -1, hmm); nhmm++; } p7_hmm_Destroy(hmm); } } if (ofp != stdout) printf("\nRetrieved %d HMMs.\n", nhmm); if (abc != NULL) esl_alphabet_Destroy(abc); esl_keyhash_Destroy(keys); esl_fileparser_Close(efp); return; }
int main(int argc, char **argv) { char *hmmfile = argv[1]; ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om1 = NULL; P7_OPROFILE *om2 = NULL; int status; char errbuf[eslERRBUFSIZE]; status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); status = p7_hmmfile_Read(hfp, &abc, &hmm); if (status == eslEFORMAT) p7_Fail("Bad file format in HMM file %s:\n%s\n", hfp->fname, hfp->errbuf); else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", hfp->fname, esl_abc_DecodeType(abc->type)); else if (status == eslEOF) p7_Fail("Empty HMM file %s? No HMM data found.\n", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s\n", hfp->fname); bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); om1 = p7_oprofile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, 400, p7_LOCAL); p7_oprofile_Convert(gm, om1); om2 = p7_oprofile_Copy(om1); if (p7_oprofile_Compare(om1, om2, 0.001f, errbuf) != eslOK) p7_Fail("Compare failed %s\n", errbuf); p7_oprofile_Destroy(om1); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); return eslOK; }
static void pipeline_thread(void *arg) { int workeridx; int status; WORK_ITEM *item; void *newItem; WORKER_INFO *info; ESL_THREADS *obj; obj = (ESL_THREADS *) arg; esl_threads_Started(obj, &workeridx); info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx); status = esl_workqueue_WorkerUpdate(info->queue, NULL, &newItem); if (status != eslOK) esl_fatal("Work queue worker failed"); /* loop until all blocks have been processed */ item = (WORK_ITEM *) newItem; while (item->msa != NULL) { status = p7_Builder(info->bld, item->msa, info->bg, &item->hmm, NULL, NULL, NULL, &item->postmsa); if (status != eslOK) p7_Fail("build failed: %s", info->bld->errbuf); item->entropy = p7_MeanMatchRelativeEntropy(item->hmm, info->bg); item->processed = TRUE; status = esl_workqueue_WorkerUpdate(info->queue, item, &newItem); if (status != eslOK) esl_fatal("Work queue worker failed"); item = (WORK_ITEM *) newItem; } status = esl_workqueue_WorkerUpdate(info->queue, item, NULL); if (status != eslOK) esl_fatal("Work queue worker failed"); esl_threads_Finished(obj, workeridx); return; }
/* Function: p7_Alimask_MakeModel2AliMap() * Synopsis: Compute map of coordinate in the alignment corresponding to each model position. * * Args: msa - The alignment for which the mapped model is to be computed. We assume * the MSA has already been manipulated to account for model building * flags (e.g. weighting). * do_hand - TRUE when the model is to follow a hand-build RF line (which must be * part of the file. * symfraq - if weighted occupancy exceeds this value, include the column in the model. * map - int array into which the map values will be stored. Calling function * must allocate (msa->alen+1) ints. * * Returns: The number of mapped model positions. */ int p7_Alimask_MakeModel2AliMap(ESL_MSA *msa, int do_hand, float symfrac, int *map ) { int i = 0; int apos, idx; float r; /* weighted residue count */ float totwgt; /* weighted residue+gap count */ i = 0; if ( do_hand ) { if (msa->rf == NULL) p7_Fail("Model file does not contain an RF line, required for --hand.\n"); /* Watch for off-by-one. rf is [0..alen-1]*/ for (apos = 1; apos <= msa->alen; apos++) { if (!esl_abc_CIsGap(msa->abc, msa->rf[apos-1]) ) { map[i] = apos; i++; } } } else { for (apos = 1; apos <= msa->alen; apos++) { r = totwgt = 0.; for (idx = 0; idx < msa->nseq; idx++) { if (esl_abc_XIsResidue(msa->abc, msa->ax[idx][apos])) { r += msa->wgt[idx]; totwgt += msa->wgt[idx]; } else if (esl_abc_XIsGap(msa->abc, msa->ax[idx][apos])) { totwgt += msa->wgt[idx]; } else if (esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos])) continue; } if (r > 0. && r / totwgt >= symfrac) { map[i] = apos; i++; } } } return i; }
/* onefetch(): * Given one <key> (an HMM name or accession), retrieve the corresponding HMM. * In SSI mode, we can do this quickly by positioning the file, then reading * and writing the HMM that's at that position. * Without an SSI index, we have to parse the HMMs sequentially 'til we find * the one we're after. */ static void onefetch(ESL_GETOPTS *go, FILE *ofp, char *key, P7_HMMFILE *hfp) { ESL_ALPHABET *abc = NULL; P7_HMM *hmm = NULL; int status; if (hfp->ssi != NULL) { status = p7_hmmfile_PositionByKey(hfp, key); if (status == eslENOTFOUND) p7_Fail("HMM %s not found in SSI index for file %s\n", key, hfp->fname); else if (status == eslEFORMAT) p7_Fail("Failed to parse SSI index for %s\n", hfp->fname); else if (status != eslOK) p7_Fail("Failed to look up location of HMM %s in SSI index of file %s\n", key, hfp->fname); } while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", hfp->fname); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", hfp->fname); if (strcmp(key, hmm->name) == 0 || (hmm->acc && strcmp(key, hmm->acc) == 0)) break; p7_hmm_Destroy(hmm); hmm = NULL; } if (status == eslOK) { p7_hmmfile_WriteASCII(ofp, -1, hmm); p7_hmm_Destroy(hmm); } else p7_Fail("HMM %s not found in file %s\n", key, hfp->fname); esl_alphabet_Destroy(abc); }
/* recalibrate_model() * * Optionally, user can choose (with --recal) to replace the * statistical parameters of the HMM. The calibrated params are used * to generate "expected" distributions in output plots. see * p7_Calibrate(), which this is a partial copy of. (p7_Calibrate() * requires a P7_BUILDER object that we don't have.) * * On success, returns <eslOK> and the statistical parameters of the * <hmm> have been recalibrated and replaced. */ static int recalibrate_model(ESL_GETOPTS *go, struct cfg_s *cfg, char *errbuf, P7_HMM *hmm) { P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; int EmL = esl_opt_GetInteger(go, "--EmL"); int EmN = esl_opt_GetInteger(go, "--EmN"); int EvL = esl_opt_GetInteger(go, "--EvL"); int EvN = esl_opt_GetInteger(go, "--EvN"); int EfL = esl_opt_GetInteger(go, "--EfL"); int EfN = esl_opt_GetInteger(go, "--EfN"); double Eft = esl_opt_GetReal (go, "--Eft"); double lambda, mmu, vmu, ftau; gm = p7_profile_Create(hmm->M, cfg->abc); p7_profile_Config(gm, hmm, cfg->bg); /* dual-mode multihit; L=0 (no length model needed right now) */ P7_HARDWARE *hw; if ((hw = p7_hardware_Create ()) == NULL) p7_Fail("Couldn't get HW information data structure"); om = p7_oprofile_Create(gm->M, cfg->abc, hw->simd); p7_oprofile_Convert(gm, om); /* om is now *local* multihit */ p7_Lambda(hmm, cfg->bg, &lambda); p7_MSVMu (cfg->r, om, cfg->bg, EmL, EmN, lambda, &mmu); p7_ViterbiMu(cfg->r, om, cfg->bg, EvL, EvN, lambda, &vmu); p7_Tau (cfg->r, om, cfg->bg, EfL, EfN, lambda, Eft, &ftau); hmm->evparam[p7_MLAMBDA] = lambda; hmm->evparam[p7_VLAMBDA] = lambda; hmm->evparam[p7_FLAMBDA] = lambda; hmm->evparam[p7_MMU] = mmu; hmm->evparam[p7_VMU] = vmu; hmm->evparam[p7_FTAU] = ftau; hmm->flags |= p7H_STATS; p7_profile_Destroy(gm); p7_oprofile_Destroy(om); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_ALPHABET *abc = NULL; char *hmmfile = esl_opt_GetArg(go, 1); P7_HMMFILE *hfp = NULL; P7_OPROFILE *om = NULL; int nmodel = 0; uint64_t totM = 0; int status; char errbuf[eslERRBUFSIZE]; esl_stopwatch_Start(w); status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); while ((status = p7_oprofile_ReadMSV(hfp, &abc, &om)) == eslOK) { nmodel++; totM += om->M; p7_oprofile_Destroy(om); } if (status == eslEFORMAT) p7_Fail("bad file format in profile file %s", hmmfile); else if (status == eslEINCOMPAT) p7_Fail("profile file %s contains different alphabets", hmmfile); else if (status != eslEOF) p7_Fail("Unexpected error in reading profiles from %s", hmmfile); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# number of models: %d\n", nmodel); printf("# total M: %" PRId64 "\n", totM); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS*r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET*abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm1, *gm2; int L = 2000; // esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); int MaxPart = esl_opt_GetInteger(go, "-M"); __m128 resdata[10]; float *sc1 = (float*) (resdata+0); // uses 1 __m128s ESL_DSQ *dsq = NULL; int i, j; ESL_SQFILE *sqfp = NULL; DATA_STREAM *dstream; struct timeb tbstart, tbend; int sumlengths = 0; if (p7_hmmfile_Open(hmmfile, NULL, &hfp)!= eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm1 = p7_profile_Create(hmm->M, abc); gm2 = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm1, L, p7_UNILOCAL); p7_ProfileConfig(hmm, bg, gm2, L, p7_UNILOCAL); dstream = p7_ViterbiStream_Create(gm1); p7_ViterbiStream_Setup(dstream, L+100, MaxPart); // use max L dstream->L = L; // No. of partitions computed without full parallelism ( == no. of threads active while some are idle) int Niters_part = dstream->Npartitions % NTHREADS; // No. of Model lines that could be computed but are wasted by idle threads waiting on the end int Nwasted_threads = dstream->partition * ((NTHREADS-Niters_part) % NTHREADS); // No. of lines in the last partition that go beyond M. It's wasted comp time by a single thread int Nwasted_leftover= (dstream->partition - gm1->M % dstream->partition) % dstream->partition; // Total number of wasted lines int wastedcomp = Nwasted_threads + Nwasted_leftover; // Total number of lines computed and waited for int totalcomp = wastedcomp + gm1->M; // same as: roundtop(gm1->M, dstream->partition * NTHREADS); printf("Viterbi Stream Word with %d Threads, model %s: Modelsize %d, #Segms: %d, SeqL: %d, Nseqs %d, Part %d, #Parts %d\n", NTHREADS, hmmfile, gm1->M, (int) ceil(gm1->M/8.0), L, 8*N, dstream->partition, dstream->Npartitions); printf("Total Comp Lines: %d | Wasted Comp Lines: %d\n", totalcomp, wastedcomp); // for ViterbiFilter P7_OPROFILE *om = p7_oprofile_Create(hmm->M, gm1->abc); p7_oprofile_Convert(gm1, om); P7_OMX *ox = p7_omx_Create(hmm->M, 0, 0); dsq_cmp_t **seqsdb= calloc(8*N+64, sizeof(dsq_cmp_t*)); if(0) { ESL_SQ* sq = esl_sq_CreateDigital(abc); if (esl_sqfile_OpenDigital(abc, seqfile, eslSQFILE_FASTA, NULL, &sqfp) != eslOK) { p7_Fail("Failed to open sequence file\n"); return -1; } for (j = 0; j < 8*N; j++) { int res = esl_sqio_Read(sqfp, sq); if (res != eslOK) { printf("ATENCAO: faltam sequencias\n"); break; } int len = sq->n; dsq = sq->dsq; seqsdb[j] = malloc(sizeof(dsq_cmp_t)); seqsdb[j]->length = len; seqsdb[j]->seq = malloc((len+4)*sizeof(ESL_DSQ)); memcpy(seqsdb[j]->seq, dsq, len+2); sumlengths += len; esl_sq_Reuse(sq); } ftime(&tbstart); N = j/8; printf("N = %d\n", N); // Sort sequences by length qsort(seqsdb, N*8, sizeof(dsq_cmp_t*), compare_seqs); } else if(0) for (i = 0; i < N; i++) { for (j = 0; j < 8; j++) { int len = L - rand()%1000; seqsdb[i*8+j] = malloc(sizeof(dsq_cmp_t)); seqsdb[i*8+j]->seq = malloc(len+4); seqsdb[i*8+j]->length = len; esl_rsq_xfIID(r, bg->f, abc->K, len, seqsdb[i*8+j]->seq); sumlengths += len; } } // double sumerrors = 0; float* results = (float*) alloc_m128_aligned64(N*2+2); ftime(&tbstart); for (j = 0; j < N; j++) for (i = 0; i < N; i++) { // if (i % 10000 == 0) printf("START %d\n", i); p7_ViterbiStream(dstream, seqsdb+i*8, sc1); // memcpy(results+i*8, sc1, 32); } ftime(&tbend); double secs = TIMEDIFF(tbstart,tbend); // printf("Qsort time: %6.3f | Viterbi time: %6.3f\n", TIMEDIFF(tbqsort,tbstart), secs); w->elapsed = w->user = secs; esl_stopwatch_Display(stdout, w, "# Opt CPU time: "); printf("# %.0fM cells in %.1f Mc/s\n", (sumlengths * (double) gm1->M) / 1e6, (sumlengths * (double) gm1->M * 1e-6) / secs); if(0) // compare results against base version for (i = 0; i < 1000 && i < N; i++) { int maxll = 0; float sc2; for (j = 0; j < 8; j++) if (maxll < seqsdb[i*8+j]->length) maxll = seqsdb[i*8+j]->length; // for (j = 0; j < 8; j++) printf("%d ", seqsdb[i*8+j]->length); printf("\n"); // if (i % 10 == 0) printf("i %d\n", i); p7_oprofile_ReconfigRestLength(om, maxll); p7_ReconfigLength(gm2, maxll); // fazer Reconfig aqui para emular compl o VitStream for (j = 0; j < 8; j++) { // p7_ReconfigLength(gm2, seqsdb[i*8+j]->length); // p7_Viterbi_unilocal(seqsdb[i*8+j]->seq, seqsdb[i*8+j]->length, gm2, &sc3); // p7_Viterbi_unilocal_word(seqsdb[i*8+j]->seq, seqsdb[i*8+j]->length, gm2, &sc2); // p7_oprofile_ReconfigLength(om, seqsdb[i*8+j]->length); p7_ViterbiFilter(seqsdb[i*8+j]->seq, seqsdb[i*8+j]->length, om, ox, &sc2); //sumerrors += fabs(sc1[j]- sc2); if (fabs(results[i*8+j] - sc2) > 0.00001) { printf("%3d-%d L %4d: VS %f d %f\t| Base SerI %f\n", i, j, seqsdb[i*8+j]->length, results[i*8+j], fabs(results[i*8+j] - sc2), sc2); getc(stdin); } } } return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *fwd = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; P7_TRACE *tr = NULL; int format = eslSQFILE_UNKNOWN; char errbuf[eslERRBUFSIZE]; float sc; int d; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); esl_sqfile_Close(sqfp); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); /* Allocate matrix and a trace */ fwd = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_Create(); /* Run Viterbi; do traceback */ p7_GViterbi (sq->dsq, sq->n, gm, fwd, &sc); p7_GTrace (sq->dsq, sq->n, gm, fwd, tr); /* Dump and validate the trace. */ p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf); /* Domain info in the trace. */ p7_trace_Index(tr); printf("# Viterbi: %d domains : ", tr->ndom); for (d = 0; d < tr->ndom; d++) printf("%6d %6d %6d %6d ", tr->sqfrom[d], tr->sqto[d], tr->hmmfrom[d], tr->hmmto[d]); printf("\n"); /* Cleanup */ p7_trace_Destroy(tr); p7_gmx_Destroy(fwd); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_sq_Destroy(sq); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *gx = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc; double base_time, bench_time, Mcs; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL); gx = p7_gmx_Create(gm->M, L); /* Baseline time. */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; /* Benchmark time. */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_GViterbi (dsq, L, gm, gx, &sc); } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_gmx_Destroy(gx); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; int i, j; int status = eslOK; P7_HMMFILE *hfp = NULL; /* open input HMM file */ P7_HMM *hmm = NULL; /* one HMM query */ ESL_ALPHABET *abc = NULL; /* digital alphabet */ P7_BG *bg = NULL; char errbuf[eslERRBUFSIZE]; char* hmmfile; float *rel_ents = NULL; float **heights = NULL; float **probs = NULL; float *ins_P = NULL; float *ins_expL = NULL; float *occupancy = NULL; int mode = HMMLOGO_RELENT_ALL; //default go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal(argv[0], "Error in configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") ) { p7_banner (stdout, argv[0], banner); esl_usage (stdout, argv[0], usage); puts("\nOptions:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 100); exit(0); } if (esl_opt_ArgNumber(go) != 1) esl_fatal(argv[0], "Incorrect number of command line arguments.\n"); hmmfile = esl_opt_GetArg(go, 1); if (esl_opt_IsOn(go, "--height_relent_all")) mode = HMMLOGO_RELENT_ALL; else if (esl_opt_IsOn(go, "--height_relent_abovebg")) mode = HMMLOGO_RELENT_ABOVEBG; else if (esl_opt_IsOn(go, "--height_score")) mode = HMMLOGO_SCORE; else mode = HMMLOGO_RELENT_ALL; //default /* Open the query profile HMM file */ status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); status = p7_hmmfile_Read(hfp, &abc, &hmm); bg = p7_bg_Create(abc); ESL_ALLOC(rel_ents, (hmm->M+1) * sizeof(float)); ESL_ALLOC(heights, (hmm->M+1) * sizeof(float*)); ESL_ALLOC(probs, (hmm->M+1) * sizeof(float*)); for (i = 1; i <= hmm->M; i++) { ESL_ALLOC(heights[i], abc->K * sizeof(float)); ESL_ALLOC(probs[i], abc->K * sizeof(float)); } /* residue heights */ if (mode == HMMLOGO_RELENT_ALL) { printf ("max expected height = %.2f\n", hmmlogo_maxHeight(bg) ); hmmlogo_RelativeEntropy_all(hmm, bg, rel_ents, probs, heights); } else if (mode == HMMLOGO_RELENT_ABOVEBG) { printf ("max expected height = %.2f\n", hmmlogo_maxHeight(bg) ); hmmlogo_RelativeEntropy_above_bg(hmm, bg, rel_ents, probs, heights); } else if (mode == HMMLOGO_SCORE) { hmmlogo_ScoreHeights(hmm, bg, heights ); } printf ("Residue heights\n"); for (i = 1; i <= hmm->M; i++) { printf("%d: ", i); for (j=0; j<abc->K; j++) printf("%6.3f ", heights[i][j] ); if (mode != HMMLOGO_SCORE) printf(" (%6.3f)", rel_ents[i]); printf("\n"); } if (rel_ents != NULL) free(rel_ents); if (heights != NULL) { for (i = 1; i <= hmm->M; i++) if (heights[i] != NULL) free(heights[i]); free(heights); } /* indel values */ if (! esl_opt_IsOn(go, "--no_indel")) { ESL_ALLOC(ins_P, (hmm->M+1) * sizeof(float)); ESL_ALLOC(ins_expL, (hmm->M+1) * sizeof(float)); ESL_ALLOC(occupancy, (hmm->M+1) * sizeof(float)); hmmlogo_IndelValues(hmm, ins_P, ins_expL, occupancy); printf ("Indel values\n"); for (i = 1; i <= hmm->M; i++) printf("%d: %6.3f %6.3f %6.3f\n", i, ins_P[i], ins_expL[i], occupancy[i] ); free(ins_P); free(ins_expL); free(occupancy); } p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); exit(0); ERROR: if (rel_ents != NULL) free(rel_ents); if (heights != NULL) { for (i = 1; i <= hmm->M; i++) if (heights[i] != NULL) free(heights[i]); free(heights); } if (hfp != NULL) p7_hmmfile_Close(hfp); if (abc != NULL) esl_alphabet_Destroy(abc); if (ins_P != NULL) free(ins_P); if (ins_expL != NULL) free(ins_expL); if (occupancy != NULL) free(occupancy); }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; double lambda = 0.0; double mmu = 0.0; double vmu = 0.0; double ftau = 0.0; int Z = esl_opt_GetInteger(go, "-Z"); int EmL = esl_opt_GetInteger(go, "--EmL"); int EmN = esl_opt_GetInteger(go, "--EmN"); int EvL = esl_opt_GetInteger(go, "--EvL"); int EvN = esl_opt_GetInteger(go, "--EvN"); int EfL = esl_opt_GetInteger(go, "--EfL"); int EfN = esl_opt_GetInteger(go, "--EfN"); int Eft = esl_opt_GetReal (go, "--Eft"); int iteration; int do_msv, do_vit, do_fwd; int status; if (esl_opt_GetBoolean(go, "--msvonly") == TRUE) { do_msv = TRUE; do_vit = FALSE; do_fwd = FALSE; } else if (esl_opt_GetBoolean(go, "--vitonly") == TRUE) { do_msv = FALSE; do_vit = TRUE; do_fwd = FALSE; } else if (esl_opt_GetBoolean(go, "--fwdonly") == TRUE) { do_msv = FALSE; do_vit = FALSE; do_fwd = TRUE; } else { do_msv = TRUE; do_vit = TRUE; do_fwd = TRUE; } if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (bg == NULL) bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, EvL, p7_LOCAL); /* the EvL doesn't matter */ om = p7_oprofile_Create(hmm->M, abc); p7_oprofile_Convert(gm, om); if (esl_opt_IsOn(go, "--lambda")) lambda = esl_opt_GetReal(go, "--lambda"); else p7_Lambda(hmm, bg, &lambda); for (iteration = 0; iteration < Z; iteration++) { if (do_msv) p7_MSVMu (r, om, bg, EmL, EmN, lambda, &mmu); if (do_vit) p7_ViterbiMu (r, om, bg, EvL, EvN, lambda, &vmu); if (do_fwd) p7_Tau (r, om, bg, EfL, EfN, lambda, Eft, &ftau); printf("%s %.4f %.4f %.4f %.4f\n", hmm->name, lambda, mmu, vmu, ftau); } p7_hmm_Destroy(hmm); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); } p7_hmmfile_Close(hfp); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox = NULL; P7_GMX *gx = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float vfraw, nullsc, vfscore; float graw, gscore; double P, gP; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); /* allocate DP matrices, both a generic and an optimized one */ ox = p7_omx_Create(gm->M, 0, sq->n); gx = p7_gmx_Create(gm->M, sq->n); /* Useful to place and compile in for debugging: p7_oprofile_Dump(stdout, om); dumps the optimized profile p7_omx_SetDumpMode(ox, TRUE); makes the fast DP algorithms dump their matrices p7_gmx_Dump(stdout, gx); dumps a generic DP matrix */ while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { p7_oprofile_ReconfigLength(om, sq->n); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_omx_GrowTo(ox, om->M, 0, sq->n); p7_gmx_GrowTo(gx, gm->M, sq->n); p7_ViterbiFilter (sq->dsq, sq->n, om, ox, &vfraw); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); vfscore = (vfraw - nullsc) / eslCONST_LOG2; P = esl_gumbel_surv(vfscore, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]); p7_GViterbi (sq->dsq, sq->n, gm, gx, &graw); gscore = (graw - nullsc) / eslCONST_LOG2; gP = esl_gumbel_surv(gscore, gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA]); if (esl_opt_GetBoolean(go, "-1")) { printf("%-30s\t%-20s\t%9.2g\t%7.2f\t%9.2g\t%7.2f\n", sq->name, hmm->name, P, vfscore, gP, gscore); } else if (esl_opt_GetBoolean(go, "-P")) { /* output suitable for direct use in profmark benchmark postprocessors: */ printf("%g\t%.2f\t%s\t%s\n", P, vfscore, sq->name, hmm->name); } else { printf("target sequence: %s\n", sq->name); printf("vit filter raw score: %.2f nats\n", vfraw); printf("null score: %.2f nats\n", nullsc); printf("per-seq score: %.2f bits\n", vfscore); printf("P-value: %g\n", P); printf("GViterbi raw score: %.2f nats\n", graw); printf("GViterbi seq score: %.2f bits\n", gscore); printf("GViterbi P-value: %g\n", gP); } esl_sq_Reuse(sq); } /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox = NULL; P7_GMX *gx = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc1, sc2; double base_time, bench_time, Mcs; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x")) p7_profile_SameAsVF(om, gm); ox = p7_omx_Create(gm->M, 0, 0); gx = p7_gmx_Create(gm->M, L); /* Get a baseline time: how long it takes just to generate the sequences */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; /* Run the benchmark */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_ViterbiFilter(dsq, L, om, ox, &sc1); if (esl_opt_GetBoolean(go, "-c")) { p7_GViterbi(dsq, L, gm, gx, &sc2); printf("%.4f %.4f\n", sc1, sc2); } if (esl_opt_GetBoolean(go, "-x")) { p7_GViterbi(dsq, L, gm, gx, &sc2); sc2 /= om->scale_w; if (om->mode == p7_UNILOCAL) sc2 -= 2.0; /* that's ~ L \log \frac{L}{L+2}, for our NN,CC,JJ */ else if (om->mode == p7_LOCAL) sc2 -= 3.0; /* that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ */ printf("%.4f %.4f\n", sc1, sc2); } } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_ALPHABET *abc = NULL; /* sequence alphabet */ ESL_GETOPTS *go = NULL; /* command line processing */ ESL_RANDOMNESS *r = NULL; /* source of randomness */ P7_HMM *hmm = NULL; /* sampled HMM to emit from */ P7_HMM *core = NULL; /* safe copy of the HMM, before config */ P7_BG *bg = NULL; /* null model */ ESL_SQ *sq = NULL; /* sampled sequence */ P7_TRACE *tr = NULL; /* sampled trace */ P7_PROFILE *gm = NULL; /* profile */ int i,j; int i1,i2; int k1,k2; int iseq; FILE *fp = NULL; double expected; int do_ilocal; char *hmmfile = NULL; int nseq; int do_swlike; int do_ungapped; int L; int M; int do_h2; char *ipsfile = NULL; char *kpsfile = NULL; ESL_DMATRIX *imx = NULL; ESL_DMATRIX *kmx = NULL; ESL_DMATRIX *iref = NULL; /* reference matrix: expected i distribution under ideality */ int Lbins; int status; char errbuf[eslERRBUFSIZE]; /***************************************************************** * Parse the command line *****************************************************************/ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal("Failed to parse command line: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { puts(usage); puts("\n where options are:\n"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2 = indentation; 80=textwidth*/ return eslOK; } do_ilocal = esl_opt_GetBoolean(go, "-i"); hmmfile = esl_opt_GetString (go, "-m"); nseq = esl_opt_GetInteger(go, "-n"); do_swlike = esl_opt_GetBoolean(go, "-s"); do_ungapped = esl_opt_GetBoolean(go, "-u"); L = esl_opt_GetInteger(go, "-L"); M = esl_opt_GetInteger(go, "-M"); do_h2 = esl_opt_GetBoolean(go, "-2"); ipsfile = esl_opt_GetString (go, "--ips"); kpsfile = esl_opt_GetString (go, "--kps"); if (esl_opt_ArgNumber(go) != 0) { puts("Incorrect number of command line arguments."); printf("Usage: %s [options]\n", argv[0]); return eslFAIL; } r = esl_randomness_CreateFast(0); if (hmmfile != NULL) { /* Read the HMM (and get alphabet from it) */ P7_HMMFILE *hfp = NULL; status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); if ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslOK) { if (status == eslEOD) esl_fatal("read failed, HMM file %s may be truncated?", hmmfile); else if (status == eslEFORMAT) esl_fatal("bad file format in HMM file %s", hmmfile); else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets", hmmfile); else esl_fatal("Unexpected error in reading HMMs"); } M = hmm->M; p7_hmmfile_Close(hfp); } else { /* Or sample the HMM (create alphabet first) */ abc = esl_alphabet_Create(eslAMINO); if (do_ungapped) p7_hmm_SampleUngapped(r, M, abc, &hmm); else if (do_swlike) p7_hmm_SampleUniform (r, M, abc, 0.05, 0.5, 0.05, 0.2, &hmm); /* tmi, tii, tmd, tdd */ else p7_hmm_Sample (r, M, abc, &hmm); } Lbins = M; imx = esl_dmatrix_Create(Lbins, Lbins); iref = esl_dmatrix_Create(Lbins, Lbins); kmx = esl_dmatrix_Create(M, M); esl_dmatrix_SetZero(imx); esl_dmatrix_SetZero(iref); esl_dmatrix_SetZero(kmx); tr = p7_trace_Create(); sq = esl_sq_CreateDigital(abc); bg = p7_bg_Create(abc); core = p7_hmm_Clone(hmm); if (do_h2) { gm = p7_profile_Create(hmm->M, abc); p7_H2_ProfileConfig(hmm, bg, gm, p7_UNILOCAL); } else { gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL); if (p7_hmm_Validate (hmm, NULL, 0.0001) != eslOK) esl_fatal("whoops, HMM is bad!"); if (p7_profile_Validate(gm, NULL, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!"); } /* Sample endpoints. * Also sample an ideal reference distribution for i endpoints. i * endpoints are prone to discretization artifacts, when emitted * sequences have varying lengths. Taking log odds w.r.t. an ideal * reference that is subject to the same discretization artifacts * cancels out the effect. */ for (iseq = 0; iseq < nseq; iseq++) { if (do_ilocal) ideal_local_endpoints (r, core, sq, tr, Lbins, &i1, &i2, &k1, &k2); else profile_local_endpoints(r, core, gm, sq, tr, Lbins, &i1, &i2, &k1, &k2); imx->mx[i1-1][i2-1] += 1.; kmx->mx[k1-1][k2-1] += 1.; /* reference distribution for i */ ideal_local_endpoints (r, core, sq, tr, Lbins, &i1, &i2, &k1, &k2); iref->mx[i1-1][i2-1] += 1.; } /* Adjust both mx's to log_2(obs/exp) ratio */ printf("Before normalization/log-odds:\n"); printf(" i matrix values range from %f to %f\n", dmx_upper_min(imx), dmx_upper_max(imx)); printf(" k matrix values range from %f to %f\n", dmx_upper_min(kmx), dmx_upper_max(kmx)); printf("iref matrix values range from %f to %f\n", dmx_upper_min(iref), dmx_upper_max(iref)); expected = (double) nseq * 2. / (double) (M*(M+1)); for (i = 0; i < kmx->m; i++) for (j = i; j < kmx->n; j++) kmx->mx[i][j] = log(kmx->mx[i][j] / expected) / log(2.0); for (i = 0; i < imx->m; i++) for (j = i; j < imx->m; j++) if (iref->mx[i][j] == 0. && imx->mx[i][j] == 0.) imx->mx[i][j] = 0.; else if (iref->mx[i][j] == 0.) imx->mx[i][j] = eslINFINITY; else if (imx->mx[i][j] == 0.) imx->mx[i][j] = -eslINFINITY; else imx->mx[i][j] = log(imx->mx[i][j] / iref->mx[i][j]) / log(2.0); /* Print ps files */ if (kpsfile != NULL) { if ((fp = fopen(kpsfile, "w")) == NULL) esl_fatal("Failed to open output postscript file %s", kpsfile); dmx_Visualize(fp, kmx, -4., 5.); fclose(fp); } if (ipsfile != NULL) { if ((fp = fopen(ipsfile, "w")) == NULL) esl_fatal("Failed to open output postscript file %s", ipsfile); dmx_Visualize(fp, imx, -4., 5.); /* dmx_Visualize(fp, imx, dmx_upper_min(imx), dmx_upper_max(imx)); */ fclose(fp); } printf("After normalization/log-odds:\n"); printf("i matrix values range from %f to %f\n", dmx_upper_min(imx), dmx_upper_max(imx)); printf("k matrix values range from %f to %f\n", dmx_upper_min(kmx), dmx_upper_max(kmx)); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(core); p7_hmm_Destroy(hmm); p7_trace_Destroy(tr); esl_sq_Destroy(sq); esl_dmatrix_Destroy(imx); esl_dmatrix_Destroy(kmx); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); float null2[p7_MAXCODE]; int i,j,d,pos; int nsamples = 200; float fsc, bsc; double Mcs; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); ox1 = p7_omx_Create(gm->M, L, L); ox2 = p7_omx_Create(gm->M, L, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, ox1, &fsc); if (esl_opt_GetBoolean(go, "-t")) { P7_TRACE *tr = p7_trace_Create(); float *n2sc = malloc(sizeof(float) * (L+1)); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { /* This is approximately what p7_domaindef.c::region_trace_ensemble() is doing: */ for (j = 0; j < nsamples; j++) { p7_StochasticTrace(r, dsq, L, om, ox1, tr); p7_trace_Index(tr); pos = 1; for (d = 0; d < tr->ndom; d++) { p7_Null2_ByTrace(om, tr, tr->tfrom[d], tr->tto[d], ox2, null2); for (; pos <= tr->sqfrom[d]; pos++) n2sc[pos] += 1.0; for (; pos < tr->sqto[d]; pos++) n2sc[pos] += null2[dsq[pos]]; } for (; pos <= L; pos++) n2sc[pos] += 1.0; p7_trace_Reuse(tr); } for (pos = 1; pos <= L; pos++) n2sc[pos] = logf(n2sc[pos] / nsamples); } esl_stopwatch_Stop(w); free(n2sc); p7_trace_Destroy(tr); } else { p7_Backward(dsq, L, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_Null2_ByExpectation(om, ox2, null2); esl_stopwatch_Stop(w); } Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }