int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* application configuration */ char *alifile = NULL; /* alignment file name */ int infmt = eslMSAFILE_UNKNOWN; /* format code for alifile */ int outfmt = eslMSAFILE_UNKNOWN; /* output format for fetched msa's */ ESLX_MSAFILE *afp = NULL; /* open alignment file */ FILE *ofp = NULL; /* output stream for alignments */ int status; /* easel return code */ /*********************************************** * Parse command line ***********************************************/ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") ) cmdline_help (argv[0], go); if (esl_opt_ArgNumber(go) < 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); if (esl_opt_IsOn(go, "--informat")) { infmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat")); if (infmt == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid input alignment file format for --informat", esl_opt_GetString(go, "--informat")); } outfmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--outformat")); if (outfmt == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid output alignment file format for --outformat", esl_opt_GetString(go, "--outformat")); alifile = esl_opt_GetArg(go, 1); /* Open the alignment file. */ if ( (status = eslx_msafile_Open(NULL, alifile, NULL, infmt, NULL, &afp)) != eslOK) eslx_msafile_OpenFailure(afp, status); /* Open the SSI index, if any */ if (! esl_opt_GetBoolean(go, "--index")) { if (afp->bf->mode_is == eslBUFFER_FILE || afp->bf->mode_is == eslBUFFER_ALLFILE || afp->bf->mode_is == eslBUFFER_MMAP) { char *ssifile = NULL; esl_sprintf(&ssifile, "%s.ssi", afp->bf->filename); status = esl_ssi_Open(ssifile, &(afp->ssi)); if (status == eslERANGE ) esl_fatal("SSI index %s has 64-bit offsets; this system doesn't support them", ssifile); else if (status == eslEFORMAT) esl_fatal("SSI index %s has an unrecognized format. Try recreating, w/ esl-afetch --index", ssifile); else if (status == eslENOTFOUND) afp->ssi = NULL; else if (status != eslOK) esl_fatal("SSI index %s: open failed, error code %d\n", ssifile, status); free(ssifile); } } /* Open the output file, if any */ if (esl_opt_GetBoolean(go, "-O")) { if ((ofp = fopen(esl_opt_GetArg(go, 2), "w")) == NULL) esl_fatal("Failed to open output file %s\n", esl_opt_GetArg(go, 2)); } else if (esl_opt_GetString(go, "-o") != NULL) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) esl_fatal("Failed to open output file %s\n", esl_opt_GetString(go, "-o")); } else ofp = stdout; /* Hand off control flow as appropriate */ if (esl_opt_GetBoolean(go, "--index")) { if (esl_opt_ArgNumber(go) != 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); create_ssi_index(go, afp); } else if (esl_opt_GetBoolean(go, "-f")) { if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); multifetch(go, ofp, outfmt, esl_opt_GetArg(go, 2), afp); } else { if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); onefetch(go, ofp, outfmt, esl_opt_GetArg(go, 2), afp); if (ofp != stdout) printf("\n\nRetrieved alignment %s.\n", esl_opt_GetArg(go, 2)); } eslx_msafile_Close(afp); esl_getopts_Destroy(go); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; P7_TRACE *tr = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc, accscore; float fsc_g, bsc_g, accscore_g; double Mcs; p7_FLogsumInit(); if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001) p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode"); ox1 = p7_omx_Create(gm->M, L, L); ox2 = p7_omx_Create(gm->M, L, L); tr = p7_trace_CreateWithPP(); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, ox1, &fsc); p7_Backward(dsq, L, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_OptimalAccuracy(om, ox2, ox1, &accscore); if (! esl_opt_GetBoolean(go, "--notrace")) { p7_OATrace(om, ox2, ox1, tr); p7_trace_Reuse(tr); } } esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x") ) { gx1 = p7_gmx_Create(gm->M, L); gx2 = p7_gmx_Create(gm->M, L); p7_GForward (dsq, L, gm, gx1, &fsc_g); p7_GBackward(dsq, L, gm, gx2, &bsc_g); p7_GDecoding(gm, gx1, gx2, gx2); p7_GOptimalAccuracy(gm, gx2, gx1, &accscore_g); printf("generic: fwd=%8.4f bck=%8.4f acc=%8.4f\n", fsc_g, bsc_g, accscore_g); printf("VMX: fwd=%8.4f bck=%8.4f acc=%8.4f\n", fsc, bsc, accscore); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); } free(dsq); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_trace_Destroy(tr); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; P7_GMX *gx = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; P7_TRACE *tr = NULL; int format = eslSQFILE_UNKNOWN; char errbuf[eslERRBUFSIZE]; float fsc, bsc; float accscore; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_OpenDigital(abc, seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); esl_sqfile_Close(sqfp); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); /* multihit local: H3 default */ om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); /* Allocations */ ox1 = p7_omx_Create(gm->M, sq->n, sq->n); ox2 = p7_omx_Create(gm->M, sq->n, sq->n); gx = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_CreateWithPP(); p7_FLogsumInit(); /* Run Forward, Backward; do OA fill and trace */ p7_Forward (sq->dsq, sq->n, om, ox1, &fsc); p7_Backward(sq->dsq, sq->n, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); /* <gx2> is now the posterior decoding matrix */ p7_OptimalAccuracy(om, ox2, ox1, &accscore); /* <gx1> is now the OA matrix */ p7_OATrace(om, ox2, ox1, tr); if (esl_opt_GetBoolean(go, "-d")) { p7_omx_FDeconvert(ox2, gx); p7_gmx_Dump(stdout, gx); } if (esl_opt_GetBoolean(go, "-m")) { p7_omx_FDeconvert(ox1, gx); p7_gmx_Dump(stdout, gx); } p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf); printf("fwd = %.4f nats\n", fsc); printf("bck = %.4f nats\n", bsc); printf("acc = %.4f (%.2f%%)\n", accscore, accscore * 100. / (float) sq->n); /* Cleanup */ esl_sq_Destroy(sq); p7_trace_Destroy(tr); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); char *hmmfile = esl_opt_GetArg(go, 1); char *qfile = esl_opt_GetArg(go, 2); ESL_SQ *qsq = esl_sq_CreateDigital(abc); ESL_SQFILE *qfp = NULL; FILE *hmmfp = NULL; ESL_SCOREMATRIX *S = esl_scorematrix_Create(abc); ESL_DMATRIX *Q = NULL; P7_BG *bg = p7_bg_Create(abc); P7_HMM *hmm = NULL; double *fa = NULL; double popen = esl_opt_GetReal (go, "-q"); double pextend = esl_opt_GetReal (go, "-r"); char *mxfile = esl_opt_GetString(go, "-m"); char errbuf[eslERRBUFSIZE]; double slambda; int a,b; int status; /* Reverse engineer a scoring matrix to obtain conditional prob's * that we'll use for the single-seq query HMM. Because score mx is * symmetric, we can set up P[a][b] = P(b | a), so we can use the * matrix rows as HMM match emission vectors. This means dividing * the joint probs through by f_a. */ if (mxfile == NULL) { if (esl_scorematrix_Set("BLOSUM62", S) != eslOK) esl_fatal("failed to set BLOSUM62 scores"); } else { ESL_FILEPARSER *efp = NULL; if ( esl_fileparser_Open(mxfile, NULL, &efp) != eslOK) esl_fatal("failed to open score file %s", mxfile); if ( esl_scorematrix_Read(efp, abc, &S) != eslOK) esl_fatal("failed to read matrix from %s", mxfile); esl_fileparser_Close(efp); } /* A wasteful conversion of the HMMER single-precision background probs to Easel double-prec */ ESL_ALLOC(fa, sizeof(double) * bg->abc->K); esl_vec_F2D(bg->f, bg->abc->K, fa); /* Backcalculate joint probabilities Q, given score matrix S and background frequencies fa */ status = esl_scorematrix_ProbifyGivenBG(S, fa, fa, &slambda, &Q); if (status == eslEINVAL) esl_fatal("built-in score matrix %s has no valid solution for lambda", matrix); else if (status == eslENOHALT) esl_fatal("failed to solve score matrix %s for lambda", matrix); else if (status != eslOK) esl_fatal("unexpected error in solving score matrix %s for probability parameters", matrix); esl_scorematrix_JointToConditionalOnQuery(abc, Q); /* Open the query sequence file in FASTA format */ status = esl_sqfile_Open(qfile, eslSQFILE_FASTA, NULL, &qfp); if (status == eslENOTFOUND) esl_fatal("No such file %s.", qfile); else if (status == eslEFORMAT) esl_fatal("Format of %s unrecognized.", qfile); else if (status == eslEINVAL) esl_fatal("Can't autodetect stdin or .gz."); else if (status != eslOK) esl_fatal("Open of %s failed, code %d.", qfile, status); /* Open the output HMM file */ if ((hmmfp = fopen(hmmfile, "w")) == NULL) esl_fatal("Failed to open output HMM file %s", hmmfile); /* For each sequence, build a model and save it. */ while ((status = esl_sqio_Read(qfp, qsq)) == eslOK) { p7_Seqmodel(abc, qsq->dsq, qsq->n, qsq->name, Q, bg->f, popen, pextend, &hmm); if ( p7_hmm_Validate(hmm, errbuf, 1e-5) != eslOK) esl_fatal("HMM validation failed: %s\n", errbuf); if ( p7_hmmfile_WriteASCII(hmmfp, -1, hmm) != eslOK) esl_fatal("HMM save failed"); p7_hmm_Destroy(hmm); } if (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s line %" PRId64 "):\n%s\n", qfp->filename, qfp->linenumber, qfp->errbuf); else if (status != eslEOF) esl_fatal("Unexpected error %d reading sequence file %s", status, qfp->filename); esl_dmatrix_Destroy(Q); esl_scorematrix_Destroy(S); free(fa); free(fb); esl_sq_Destroy(qsq); esl_sqfile_Close(qfp); fclose(hmmfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_STOPWATCH *w; ESL_GETOPTS *go; char *msafile; ESL_MSAFILE *afp; ESL_MSA *msa; int do_gsc; int do_pb; int do_blosum; int maxN; double maxid; double cpu; /* Process command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("failed to parse cmd line: %s", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal("failed to parse cmd line: %s", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { puts(usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ return 0; } do_blosum = esl_opt_GetBoolean(go, "--blosum"); do_gsc = esl_opt_GetBoolean(go, "--gsc"); do_pb = esl_opt_GetBoolean(go, "--pb"); maxid = esl_opt_GetReal (go, "--id"); maxN = esl_opt_GetInteger(go, "--maxN"); if (esl_opt_ArgNumber(go) != 1) { puts("Incorrect number of command line arguments."); puts(usage); return 1; } if ((msafile = esl_opt_GetArg(go, 1)) == NULL) esl_fatal("failed to parse cmd line: %s", go->errbuf); esl_getopts_Destroy(go); w = esl_stopwatch_Create(); /* Weight one or more alignments from input file */ esl_msafile_Open(msafile, eslMSAFILE_UNKNOWN, NULL, &afp); while (esl_msa_Read(afp, &msa) == eslOK) { if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; } esl_stopwatch_Start(w); if (do_gsc) esl_msaweight_GSC(msa); else if (do_pb) esl_msaweight_PB(msa); else if (do_blosum) esl_msaweight_BLOSUM(msa, maxid); esl_stopwatch_Stop(w); cpu = w->user; printf("%-20s %6d %6d %.3f\n", msa->name, msa->alen, msa->nseq, cpu); esl_msa_Destroy(msa); } esl_msafile_Close(afp); esl_stopwatch_Destroy(w); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; ESL_SQ *sq = NULL; float nullsc, filtersc, H; int status; /* Read one HMM from <hmmfile> */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Open <seqfile> for reading */ status = esl_sqfile_OpenDigital(abc, seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) esl_fatal("No such file."); else if (status == eslEFORMAT) esl_fatal("Format unrecognized."); else if (status != eslOK) esl_fatal("Open failed, code %d.", status); sq = esl_sq_CreateDigital(abc); bg = p7_bg_Create(abc); p7_bg_SetFilter(bg, hmm->M, hmm->compo); H = esl_vec_FEntropy(bg->f, bg->abc->K); printf("bg iid H = %.4f\n", H); H = esl_vec_FEntropy(hmm->compo, bg->abc->K); printf("modelcomp H = %.4f\n", H); while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { p7_bg_SetLength(bg, sq->n); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); p7_bg_FilterScore(bg, sq->dsq, sq->n, &filtersc); printf("%-20s %5d %8.5f %8.5f %8.5f\n", sq->name, (int) sq->n, nullsc, filtersc, filtersc-nullsc); esl_sq_Reuse(sq); } if (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s)\n%s\n", sqfp->filename, sqfp->get_error(sqfp)); else if (status != eslEOF) esl_fatal("Unexpected error %d reading sequence file %s", status, sqfp->filename); esl_sqfile_Close(sqfp); esl_sq_Destroy(sq); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_TRACE *tr = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc, fsc, vsc; float bestsc = -eslINFINITY; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); fwd = p7_omx_Create(gm->M, L, L); gx = p7_gmx_Create(gm->M, L); tr = p7_trace_Create(); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_GViterbi(dsq, L, gm, gx, &vsc); p7_Forward (dsq, L, om, fwd, &fsc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_StochasticTrace(r, dsq, L, om, fwd, tr); p7_trace_Score(tr, dsq, gm, &sc); bestsc = ESL_MAX(bestsc, sc); p7_trace_Reuse(tr); } esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("forward sc = %.4f nats\n", fsc); printf("viterbi sc = %.4f nats\n", vsc); printf("max trace sc = %.4f nats\n", bestsc); free(dsq); p7_trace_Destroy(tr); p7_gmx_Destroy(gx); p7_omx_Destroy(fwd); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float fraw, braw, nullsc, fsc; float gfraw, gbraw, gfsc; double P, gP; int status; /* Read in one HMM */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Open sequence file for reading */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); /* p7_oprofile_Dump(stdout, om); */ /* allocate DP matrices for O(M+L) parsers */ fwd = p7_omx_Create(gm->M, 0, sq->n); bck = p7_omx_Create(gm->M, 0, sq->n); gx = p7_gmx_Create(gm->M, sq->n); /* allocate DP matrices for O(ML) fills */ /* fwd = p7_omx_Create(gm->M, sq->n, sq->n); */ /* bck = p7_omx_Create(gm->M, sq->n, sq->n); */ /* p7_omx_SetDumpMode(stdout, fwd, TRUE); */ /* makes the fast DP algorithms dump their matrices */ /* p7_omx_SetDumpMode(stdout, bck, TRUE); */ while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { p7_oprofile_ReconfigLength(om, sq->n); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_omx_GrowTo(fwd, om->M, 0, sq->n); p7_omx_GrowTo(bck, om->M, 0, sq->n); p7_gmx_GrowTo(gx, gm->M, sq->n); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); p7_ForwardParser (sq->dsq, sq->n, om, fwd, &fraw); p7_BackwardParser(sq->dsq, sq->n, om, fwd, bck, &braw); /* p7_Forward (sq->dsq, sq->n, om, fwd, &fsc); printf("forward: %.2f nats\n", fsc); */ /* p7_Backward(sq->dsq, sq->n, om, fwd, bck, &bsc); printf("backward: %.2f nats\n", bsc); */ /* Comparison to other F/B implementations */ p7_GForward (sq->dsq, sq->n, gm, gx, &gfraw); p7_GBackward (sq->dsq, sq->n, gm, gx, &gbraw); /* p7_gmx_Dump(stdout, gx, p7_DEFAULT); */ fsc = (fraw-nullsc) / eslCONST_LOG2; gfsc = (gfraw-nullsc) / eslCONST_LOG2; P = esl_exp_surv(fsc, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]); gP = esl_exp_surv(gfsc, gm->evparam[p7_FTAU], gm->evparam[p7_FLAMBDA]); if (esl_opt_GetBoolean(go, "-1")) { printf("%-30s\t%-20s\t%9.2g\t%6.1f\t%9.2g\t%6.1f\n", sq->name, hmm->name, P, fsc, gP, gfsc); } else if (esl_opt_GetBoolean(go, "-P")) { /* output suitable for direct use in profmark benchmark postprocessors: */ printf("%g\t%.2f\t%s\t%s\n", P, fsc, sq->name, hmm->name); } else { printf("target sequence: %s\n", sq->name); printf("fwd filter raw score: %.2f nats\n", fraw); printf("bck filter raw score: %.2f nats\n", braw); printf("null score: %.2f nats\n", nullsc); printf("per-seq score: %.2f bits\n", fsc); printf("P-value: %g\n", P); printf("GForward raw score: %.2f nats\n", gfraw); printf("GBackward raw score: %.2f nats\n", gbraw); printf("GForward seq score: %.2f bits\n", gfsc); printf("GForward P-value: %g\n", gP); } esl_sq_Reuse(sq); } /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_omx_Destroy(bck); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc; float fsc2, bsc2; double base_time, bench_time, Mcs; p7_FLogsumInit(); if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001) p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode"); if (esl_opt_GetBoolean(go, "-P")) { fwd = p7_omx_Create(gm->M, 0, L); bck = p7_omx_Create(gm->M, 0, L); } else { fwd = p7_omx_Create(gm->M, L, L); bck = p7_omx_Create(gm->M, L, L); } gx = p7_gmx_Create(gm->M, L); /* Get a baseline time: how long it takes just to generate the sequences */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); if (esl_opt_GetBoolean(go, "-P")) { if (! esl_opt_GetBoolean(go, "-B")) p7_ForwardParser (dsq, L, om, fwd, &fsc); if (! esl_opt_GetBoolean(go, "-F")) p7_BackwardParser(dsq, L, om, fwd, bck, &bsc); } else { if (! esl_opt_GetBoolean(go, "-B")) p7_Forward (dsq, L, om, fwd, &fsc); if (! esl_opt_GetBoolean(go, "-F")) p7_Backward(dsq, L, om, fwd, bck, &bsc); } if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x")) { p7_GForward (dsq, L, gm, gx, &fsc2); p7_GBackward(dsq, L, gm, gx, &bsc2); printf("%.4f %.4f %.4f %.4f\n", fsc, bsc, fsc2, bsc2); } } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(bck); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* application configuration */ char *seqfile = NULL; /* sequence file name */ char *maskfile = NULL; /* mask coordinate file name */ int infmt = eslSQFILE_UNKNOWN; /* format code for seqfile */ int outfmt = eslSQFILE_FASTA; /* format code for output seqs */ ESL_SQFILE *sqfp = NULL; /* open sequence file */ ESL_FILEPARSER *maskefp = NULL; /* open mask coord file */ FILE *ofp = NULL; /* output stream for masked seqs */ char *source = NULL; /* name of current seq to mask */ char *p1, *p2; /* pointers used in parsing */ int64_t start, end; /* start, end coord for masking */ int64_t i, j, pos; /* coords in a sequence */ int64_t overmask; /* # of extra residues to mask */ ESL_SQ *sq = esl_sq_Create(); /* current sequence */ int do_fetching; int do_lowercase; int maskchar; int status; /* easel return code */ /**************************************************************************** * Parse command line ****************************************************************************/ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") ) cmdline_help (argv[0], go); if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); do_fetching = esl_opt_GetBoolean(go, "-R"); do_lowercase = esl_opt_GetBoolean(go, "-l"); overmask = (esl_opt_IsOn(go, "-x") ? esl_opt_GetInteger(go, "-x") : 0); maskchar = (esl_opt_IsOn(go, "-m") ? esl_opt_GetChar(go, "-m") : 'X'); seqfile = esl_opt_GetArg(go, 1); maskfile = esl_opt_GetArg(go, 2); /* Open the <seqfile>: text mode, not digital */ if (esl_opt_GetString(go, "--informat") != NULL) { infmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--informat")); if (infmt == eslSQFILE_UNKNOWN) cmdline_failure(argv[0], "%s is not a valid input sequence file format for --informat"); } status = esl_sqfile_Open(seqfile, infmt, NULL, &sqfp); if (status == eslENOTFOUND) cmdline_failure(argv[0], "Sequence file %s not found.\n", seqfile); else if (status == eslEFORMAT) cmdline_failure(argv[0], "Format of file %s unrecognized.\n", seqfile); else if (status == eslEINVAL) cmdline_failure(argv[0], "Can't autodetect stdin or .gz.\n"); else if (status != eslOK) cmdline_failure(argv[0], "Open failed, code %d.\n", status); if (do_fetching && sqfp->data.ascii.ssi == NULL) cmdline_failure(argv[0], "-R option (random access/fetching) requires %s to be SSI indexed\n", seqfile); /* Open the <maskfile> */ if (esl_fileparser_Open(maskfile, NULL, &maskefp) != eslOK) cmdline_failure(argv[0], "Failed to open mask coordinate file %s\n", maskfile); esl_fileparser_SetCommentChar(maskefp, '#'); /* Open the output file, if any */ if (esl_opt_GetString(go, "-o") != NULL) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) cmdline_failure(argv[0], "Failed to open output file %s\n", esl_opt_GetString(go, "-o")); } else ofp = stdout; /**************************************************************************** * Main loop over lines in <maskfile> ****************************************************************************/ /* Read one data line at a time from the <maskfile>; * parse into data fields <seqname> <start> <end> */ while (esl_fileparser_NextLine(maskefp) == eslOK) { /* First field is sequence name */ if (esl_fileparser_GetTokenOnLine(maskefp, &source, NULL) != eslOK) esl_fatal("Failed to read source seq name on line %d of file %s\n", maskefp->linenumber, maskfile); /* Get the sequence */ if (do_fetching) { /* If the <seqfile> is SSI indexed, try to reposition it and read <source> seq by random access */ status = esl_sqio_Fetch(sqfp, source, sq); if (status == eslENOTFOUND) esl_fatal("seq %s not found in SSI index for file %s\n", source, sqfp->filename); else if (status == eslEINVAL) esl_fatal("No SSI index or can't reposition in file %s\n", sqfp->filename); else if (status == eslEFORMAT) esl_fatal("Parse failed:\n%s\n", esl_sqfile_GetErrorBuf(sqfp)); else if (status != eslOK) esl_fatal("Unexpected failure in fetching %s from file %s\n", source, sqfp->filename); } else { /* else, assume we're reading sequentially; <sqfile> and <maskfile> have seqs in same order */ status = esl_sqio_Read(sqfp, sq); if (status == eslEOF) esl_fatal("File %s ended prematurely; didn't find %s\n", sqfp->filename, source); else if (status == eslEFORMAT) esl_fatal("Parse failed:\n%s\n", esl_sqfile_GetErrorBuf(sqfp)); else if (status != eslOK) esl_fatal("Unexpected error reading sequence file %s\n", sqfp->filename); if ((strcmp(sq->name, source) != 0) && (strcmp(sq->acc, source) != 0)) esl_fatal("Sequences in <sqfile> and <maskfile> aren't in same order; try -R"); } /* If we're masking by lowercase, first make sure everything's uppercase */ if (do_lowercase) for (pos = 0; pos < sq->n; pos++) if (isalpha(sq->seq[pos])) sq->seq[pos] = toupper(sq->seq[pos]); /* Next two fields are <start>, <end> for the masking */ /* possible future extension: wrap loop around this, enable multiple masked regions */ if (esl_fileparser_GetTokenOnLine(maskefp, &p1, NULL) != eslOK) esl_fatal("Failed to read start coord on line %d of file %s\n", maskefp->linenumber, maskfile); start = strtoll(p1, &p2, 0) - 1; if (esl_fileparser_GetTokenOnLine(maskefp, &p2, NULL) != eslOK) esl_fatal("Failed to read end coord on line %d of file %s\n", maskefp->linenumber, maskfile); end = strtoll(p2, &p1, 0) - 1; /* Do the masking */ if (esl_opt_GetBoolean(go, "-r")) /* Reverse masking */ { /* leave start..end unmasked; mask prefix 0..start-1, end+1..L-1 */ i = 0; j = ESL_MIN(sq->n-1, start - 1 + overmask); for (pos = i; pos <= j; pos++) if (isalpha(sq->seq[pos])) sq->seq[pos] = (do_lowercase ? tolower(sq->seq[pos]) : maskchar); i = ESL_MAX(0, end + 1 - overmask); j = sq->n-1; for (pos = i; pos <= j; pos++) if (isalpha(sq->seq[pos])) sq->seq[pos] = (do_lowercase ? tolower(sq->seq[pos]) : maskchar); } else { /* normal: mask start..end */ i = ESL_MAX(0, start - overmask); j = ESL_MIN(sq->n-1, end + overmask); for (pos = i; pos <= j; pos++) if (isalpha(sq->seq[pos])) sq->seq[pos] = (do_lowercase ? tolower(sq->seq[pos]) : maskchar); } esl_sqio_Write(ofp, sq, outfmt, FALSE); esl_sq_Reuse(sq); } esl_sq_Destroy(sq); esl_fileparser_Close(maskefp); esl_sqfile_Close(sqfp); esl_getopts_Destroy(go); if (ofp != stdout) fclose(ofp); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* application configuration */ char *seqfile = NULL; /* sequence file name */ int infmt = eslSQFILE_UNKNOWN; /* format code for seqfile */ ESL_SQFILE *sqfp = NULL; /* open sequence file */ FILE *ofp = NULL; /* output stream for sequences */ int status; /* easel return code */ /*********************************************** * Parse command line ***********************************************/ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") ) cmdline_help (argv[0], go); if (esl_opt_ArgNumber(go) < 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); /* Open the sequence file */ seqfile = esl_opt_GetArg(go, 1); if (esl_opt_GetString(go, "--informat") != NULL) { infmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--informat")); if (infmt == eslSQFILE_UNKNOWN) esl_fatal("%s is not a valid input sequence file format for --informat", esl_opt_GetString(go, "--informat")); } status = esl_sqfile_Open(seqfile, infmt, NULL, &sqfp); if (status == eslENOTFOUND) cmdline_failure(argv[0], "Sequence file %s not found.\n", seqfile); else if (status == eslEFORMAT) cmdline_failure(argv[0], "Format of file %s unrecognized.\n", seqfile); else if (status == eslEINVAL) cmdline_failure(argv[0], "Can't autodetect stdin or .gz.\n"); else if (status != eslOK) cmdline_failure(argv[0], "Open failed, code %d.\n", status); /* Open the output file, if any */ if (esl_opt_GetBoolean(go, "-O")) { if ((ofp = fopen(esl_opt_GetArg(go, 2), "w")) == NULL) cmdline_failure(argv[0], "Failed to open output file %s\n", esl_opt_GetArg(go, 2)); } else if (esl_opt_GetString(go, "-o") != NULL) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) cmdline_failure(argv[0], "Failed to open output file %s\n", esl_opt_GetString(go, "-o")); } else ofp = stdout; /* Indexing mode */ if (esl_opt_GetBoolean(go, "--index")) { if (esl_opt_ArgNumber(go) != 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); if (sqfp->data.ascii.do_gzip) cmdline_failure(argv[0], "Can't index a .gz compressed file"); if (sqfp->data.ascii.do_stdin) cmdline_failure(argv[0], "Can't index a standard input pipe"); create_ssi_index(go, sqfp); } /* List retrieval mode */ else if (esl_opt_GetBoolean(go, "-f")) { if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); /* Open the SSI index for retrieval */ if (! sqfp->data.ascii.do_gzip && ! sqfp->data.ascii.do_stdin && ! esl_sqio_IsAlignment(sqfp->format)) { status = esl_sqfile_OpenSSI(sqfp, NULL); if (status == eslEFORMAT) cmdline_failure(argv[0], "SSI index is in incorrect format\n"); else if (status == eslERANGE) cmdline_failure(argv[0], "SSI index is in 64-bit format and we can't read it\n"); else if (status != eslOK) cmdline_failure(argv[0], "Failed to open SSI index\n"); } if (esl_opt_GetBoolean(go, "-C")) multifetch_subseq(go, ofp, esl_opt_GetArg(go, 2), sqfp); else multifetch (go, ofp, esl_opt_GetArg(go, 2), sqfp); } /* Single sequence retrieval mode */ else { if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); char *key = esl_opt_GetArg(go, 2); char *cstring = esl_opt_GetString(go, "-c"); char *newname = esl_opt_GetString(go, "-n"); /* Open the SSI index for retrieval */ if (! sqfp->data.ascii.do_gzip && ! sqfp->data.ascii.do_stdin && ! esl_sqio_IsAlignment(sqfp->format)) { status = esl_sqfile_OpenSSI(sqfp, NULL); if (status == eslEFORMAT) cmdline_failure(argv[0], "SSI index is in incorrect format\n"); else if (status == eslERANGE) cmdline_failure(argv[0], "SSI index is in 64-bit format and we can't read it\n"); else if (status != eslOK) cmdline_failure(argv[0], "Failed to open SSI index\n"); } /* -c: subsequence retrieval; else full sequence retrieval */ if (cstring != NULL) { uint32_t start, end; status = esl_regexp_ParseCoordString(cstring, &start, &end); if (status == eslESYNTAX) esl_fatal("-c takes arg of subseq coords <from>..<to>; %s not recognized", cstring); if (status == eslFAIL) esl_fatal("Failed to find <from> or <to> coord in %s", cstring); onefetch_subseq(go, ofp, sqfp, newname, key, start, end); if (ofp != stdout) printf("\n\nRetrieved subsequence %s/%d-%d.\n", key, start, end); } else { onefetch(go, ofp, esl_opt_GetArg(go, 2), sqfp); if (ofp != stdout) printf("\n\nRetrieved sequence %s.\n", esl_opt_GetArg(go, 2)); } } esl_sqfile_Close(sqfp); esl_getopts_Destroy(go); return 0; }
/* seq_shuffling() * SRE, Tue Jan 22 08:35:51 2008 [Market Street Cafe, Leesburg] * * Shuffling of input sequences. * * Fixed-length (L>0) vs. full-length (L=0) modes handled differently. * In fixed-length mode: * <shuff->seq> only needs to be allocated once, for L * <targ> is an allocated copy of a random subseq of length L * sequences < L residues long can't be shuffled * In full-length mode: * <shuff->seq> is grown to length <sq->n> for each input seq * <targ> just points to <sq->seq> */ static int seq_shuffling(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt) { char *seqfile = esl_opt_GetArg(go, 1); int infmt = eslSQFILE_UNKNOWN; ESL_SQFILE *sqfp = NULL; ESL_SQ *sq = esl_sq_Create(); ESL_SQ *shuff = esl_sq_Create(); char *targ = NULL; int N = esl_opt_GetInteger(go, "-N"); int L = esl_opt_GetInteger(go, "-L"); /* L>0 means select random fixed-len subseqs */ int kmers = 0; int i; int status; if (esl_opt_GetString(go, "--informat") != NULL) { infmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--informat")); if (infmt == eslSQFILE_UNKNOWN) esl_fatal("%s is not a valid input sequence file format for --informat"); } if (esl_opt_IsOn(go, "-k")) kmers = esl_opt_GetInteger(go, "-k"); status = esl_sqfile_Open(seqfile, infmt, NULL, &sqfp); if (status == eslENOTFOUND) esl_fatal("No such file %s", seqfile); else if (status == eslEFORMAT) esl_fatal("Format of seqfile %s unrecognized.", seqfile); else if (status == eslEINVAL) esl_fatal("Can't autodetect stdin or .gz."); else if (status != eslOK) esl_fatal("Open failed, code %d.", status); if (L>0) { esl_sq_GrowTo(shuff, L); shuff->n = L; ESL_ALLOC(targ, sizeof(char) * (L+1)); } while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { if (L == 0) { /* shuffling entire sequence */ esl_sq_GrowTo(shuff, sq->n); /* make sure shuff can hold sq */ shuff->n = sq->n; targ = sq->seq; } else { if (sq->n < L) continue; /* reject seqs < L long */ } for (i = 0; i < N; i++) { if (L > 0) { /* fixed-len mode: copy a random subseq */ int pos = esl_rnd_Roll(r, sq->n - L + 1); strncpy(targ, sq->seq + pos, L); targ[L] = '\0'; } /* Do the requested kind of shuffling */ if (esl_opt_GetBoolean(go, "-m")) esl_rsq_CShuffle (r, targ, shuff->seq); /* monoresidue shuffling */ else if (esl_opt_GetBoolean(go, "-d")) esl_rsq_CShuffleDP (r, targ, shuff->seq); /* diresidue shuffling */ else if (esl_opt_IsOn (go, "-k")) esl_rsq_CShuffleKmers(r, targ, kmers, shuff->seq); /* diresidue shuffling */ else if (esl_opt_GetBoolean(go, "-0")) esl_rsq_CMarkov0 (r, targ, shuff->seq); /* 0th order Markov */ else if (esl_opt_GetBoolean(go, "-1")) esl_rsq_CMarkov1 (r, targ, shuff->seq); /* 1st order Markov */ else if (esl_opt_GetBoolean(go, "-r")) esl_rsq_CReverse ( targ, shuff->seq); /* reverse */ else if (esl_opt_IsOn (go, "-w")) { /* regionally shuffle */ int W= esl_opt_GetInteger(go, "-w"); esl_rsq_CShuffleWindows(r, targ, W, shuff->seq); } /* Set the name of the shuffled sequence */ if (N > 1) esl_sq_FormatName(shuff, "%s-shuffled-%d", sq->name, i); else esl_sq_FormatName(shuff, "%s-shuffled", sq->name); /* Output the resulting sequence */ esl_sqio_Write(ofp, shuff, outfmt, FALSE); /* don't need to reuse the shuffled sequence: we will use exactly the same memory */ } esl_sq_Reuse(sq); } if (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", sqfp->filename, esl_sqfile_GetErrorBuf(sqfp)); else if (status != eslEOF) esl_fatal("Unexpected error %d reading sequence file %s", status, sqfp->filename); if (L>0) free(targ); esl_sq_Destroy(shuff); esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); return eslOK; ERROR: if (targ != NULL) free(targ); esl_sq_Destroy(shuff); esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; P7_OMX *pp = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc; double Mcs; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); fwd = p7_omx_Create(gm->M, L, L); bck = p7_omx_Create(gm->M, L, L); pp = p7_omx_Create(gm->M, L, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, fwd, &fsc); p7_Backward(dsq, L, om, fwd, bck, &bsc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_Decoding(om, fwd, bck, pp); esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(fwd); p7_omx_Destroy(bck); p7_omx_Destroy(pp); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go; char *msafile; ESL_MSAFILE *afp; ESL_MSA *msa; int do_gsc; int do_pb; int do_blosum; int maxN; double maxid; int nsmall, nbig; int i; /* Process command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("%s", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal("%s", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE){ puts(usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ return 0; } do_blosum = esl_opt_GetBoolean(go, "--blosum"); do_gsc = esl_opt_GetBoolean(go, "--gsc"); do_pb = esl_opt_GetBoolean(go, "--pb"); maxid = esl_opt_GetReal (go, "--id"); maxN = esl_opt_GetInteger(go, "--maxN"); if (esl_opt_ArgNumber(go) != 1) { puts("Incorrect number of command line arguments."); puts(usage); return 1; } if ((msafile = esl_opt_GetArg(go, 1)) == NULL) esl_fatal("%s", go->errbuf); esl_getopts_Destroy(go); /* Weight one or more alignments from input file */ esl_msafile_Open(msafile, eslMSAFILE_UNKNOWN, NULL, &afp); while (esl_msa_Read(afp, &msa) == eslOK) { if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; } if (do_gsc) esl_msaweight_GSC(msa); else if (do_pb) esl_msaweight_PB(msa); else if (do_blosum) esl_msaweight_BLOSUM(msa, maxid); for (nsmall = 0, nbig = 0, i = 0; i < msa->nseq; i++) { if (msa->wgt[i] < 0.2) nsmall++; if (msa->wgt[i] > 5.0) nbig++; } printf("%-20s %5d %5d %8.4f %8.4f %5d %5d\n", msa->name, msa->nseq, msa->alen, esl_vec_DMin(msa->wgt, msa->nseq), esl_vec_DMax(msa->wgt, msa->nseq), nsmall, nbig); esl_msa_Destroy(msa); } esl_msafile_Close(afp); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *fwd = NULL; P7_GMX *bck = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc; double base_time, bench_time, Mcs; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL); fwd = p7_gmx_Create(gm->M, L); bck = p7_gmx_Create(gm->M, L); /* Baseline time. */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; /* Benchmark time. */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); if (! esl_opt_GetBoolean(go, "-B")) p7_GForward (dsq, L, gm, fwd, &sc); if (! esl_opt_GetBoolean(go, "-F")) p7_GBackward(dsq, L, gm, bck, NULL); } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_gmx_Destroy(bck); p7_gmx_Destroy(fwd); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { int status; ESL_GETOPTS *go = NULL; /* command line processing */ ESL_ALPHABET *abc = NULL; char *hmmfile = NULL; char *outhmmfile = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; int nhmm; double x; float KL; char errmsg[eslERRBUFSIZE]; /* Process the command line options. */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_GetBoolean(go, "-h") == TRUE) { profillic_p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); puts("\nwhere options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=docgroup, 2 = indentation; 80=textwidth*/ exit(0); } if (esl_opt_ArgNumber(go) != 2) { puts("Incorrect number of command line arguments."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if ((hmmfile = esl_opt_GetArg(go, 1)) == NULL) { puts("Failed to read <input hmmfile> argument from command line."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if ((outhmmfile = esl_opt_GetArg(go, 2)) == NULL) { puts("Failed to read <output hmmfile> argument from command line."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } profillic_p7_banner(stdout, argv[0], banner); /* Initializations: open the input HMM file for reading */ status = p7_hmmfile_Open(hmmfile, NULL, &hfp); if (status == eslENOTFOUND) p7_Fail("Failed to open HMM file %s for reading.\n", hmmfile); else if (status == eslEFORMAT) p7_Fail("File %s does not appear to be in a recognized HMM format.\n", hmmfile); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n", status, hmmfile); /* Main body: read HMMs one at a time, print one line of stats */ printf("#\n"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "idx", "name", "accession", "nseq", "eff_nseq", "M", "relent", "info", "p relE", "compKL"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "----", "--------------------", "------------", "--------", "--------", "------", "------", "------", "------", "------"); nhmm = 0; if ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) esl_fatal("read failed, HMM file %s may be truncated?", hmmfile); else if (status == eslEFORMAT) esl_fatal("bad file format in HMM file %s", hmmfile); else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets", hmmfile); else if (status != eslOK) esl_fatal("Unexpected error in reading HMMs from %s", hmmfile); nhmm++; if (bg == NULL) bg = p7_bg_Create(abc); if( abc->type == eslDNA ) { galosh::ProfileTreeRoot<seqan::Dna, floatrealspace> profile; if( (status = convert_to_galosh_profile( hmm, profile )) != eslOK ) esl_fatal("Unexpected error in converting HMM from file %s to a dna galosh profile", hmmfile); std::ofstream fs ( outhmmfile ); if( !fs.is_open() ) { esl_fatal("Unexpected error in opening the file %s for writing", outhmmfile); } else { fs << profile; fs.close(); } } else if( abc->type == eslAMINO ) { galosh::ProfileTreeRoot<seqan::AminoAcid20, floatrealspace> profile; if( (status = convert_to_galosh_profile( hmm, profile )) != eslOK ) esl_fatal("Unexpected error in converting HMM from file %s to an amino galosh profile", hmmfile); std::ofstream fs ( outhmmfile ); if( !fs.is_open() ) { esl_fatal("Unexpected error in opening the file %s for writing", outhmmfile); } else { fs << profile; fs.close(); } } else { ESL_EXCEPTION(eslEUNIMPLEMENTED, "Sorry, at present the profillic-hmmtoprofile software can only handle amino and dna."); } p7_MeanPositionRelativeEntropy(hmm, bg, &x); p7_hmm_CompositionKLDist(hmm, bg, &KL, NULL); printf("%-6d %-20s %-12s %8d %8.2f %6d %6.2f %6.2f %6.2f %6.2f\n", nhmm, hmm->name, hmm->acc == NULL ? "-" : hmm->acc, hmm->nseq, hmm->eff_nseq, hmm->M, p7_MeanMatchRelativeEntropy(hmm, bg), p7_MeanMatchInfo(hmm, bg), x, KL); /* p7_MeanForwardScore(hmm, bg)); */ p7_hmm_Destroy(hmm); } p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); p7_hmmfile_Close(hfp); esl_getopts_Destroy(go); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *fwd = NULL; P7_GMX *bck = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float fsc, bsc; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); esl_sqfile_Close(sqfp); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL); /* Allocate matrices */ fwd = p7_gmx_Create(gm->M, sq->n); bck = p7_gmx_Create(gm->M, sq->n); /* Run Forward, Backward */ p7_GForward (sq->dsq, sq->n, gm, fwd, &fsc); p7_GBackward(sq->dsq, sq->n, gm, bck, &bsc); printf("fwd = %.4f nats\n", fsc); printf("bck = %.4f nats\n", bsc); /* Cleanup */ esl_sq_Destroy(sq); p7_gmx_Destroy(fwd); p7_gmx_Destroy(bck); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *fwd = NULL; P7_GMX *bck = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float fsc, bsc; float nullsc; int status; /* Initialize log-sum calculator */ p7_FLogsumInit(); /* Read in one HMM */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); /* Now reconfig the models however we were asked to */ if (esl_opt_GetBoolean(go, "--fs")) p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); else if (esl_opt_GetBoolean(go, "--sw")) p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL); else if (esl_opt_GetBoolean(go, "--ls")) p7_ProfileConfig(hmm, bg, gm, sq->n, p7_GLOCAL); else if (esl_opt_GetBoolean(go, "--s")) p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNIGLOCAL); /* Allocate matrices */ fwd = p7_gmx_Create(gm->M, sq->n); bck = p7_gmx_Create(gm->M, sq->n); printf("%-30s %-10s %-10s %-10s %-10s\n", "# seq name", "fwd (raw)", "bck (raw) ", "fwd (bits)", "bck (bits)"); printf("%-30s %10s %10s %10s %10s\n", "#--------------", "----------", "----------", "----------", "----------"); while ( (status = esl_sqio_Read(sqfp, sq)) != eslEOF) { if (status == eslEFORMAT) p7_Fail("Parse failed (sequence file %s)\n%s\n", sqfp->filename, sqfp->get_error(sqfp)); else if (status != eslOK) p7_Fail("Unexpected error %d reading sequence file %s", status, sqfp->filename); /* Resize the DP matrices if necessary */ p7_gmx_GrowTo(fwd, gm->M, sq->n); p7_gmx_GrowTo(bck, gm->M, sq->n); /* Set the profile and null model's target length models */ p7_bg_SetLength(bg, sq->n); p7_ReconfigLength(gm, sq->n); /* Run Forward, Backward */ p7_GForward (sq->dsq, sq->n, gm, fwd, &fsc); p7_GBackward(sq->dsq, sq->n, gm, bck, &bsc); p7_gmx_Dump(stdout, fwd, p7_DEFAULT); /* Those scores are partial log-odds likelihoods in nats. * Subtract off the rest of the null model, convert to bits. */ p7_bg_NullOne(bg, sq->dsq, sq->n, &nullsc); printf("%-30s %10.4f %10.4f %10.4f %10.4f\n", sq->name, fsc, bsc, (fsc - nullsc) / eslCONST_LOG2, (bsc - nullsc) / eslCONST_LOG2); p7_gmx_Reuse(fwd); p7_gmx_Reuse(bck); esl_sq_Reuse(sq); } /* Cleanup */ esl_sqfile_Close(sqfp); esl_sq_Destroy(sq); p7_gmx_Destroy(fwd); p7_gmx_Destroy(bck); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* application configuration */ ESL_ALPHABET *abc = NULL; /* biological alphabet */ char *alifile = NULL; /* alignment file name */ int fmt = eslMSAFILE_UNKNOWN; /* format code for alifile */ ESLX_MSAFILE *afp = NULL; /* open msa file */ ESL_MSAFILE2 *old_afp = NULL; /* open msa file, legacy (--small) */ ESL_MSA *msa = NULL; /* one multiple sequence alignment */ int nali; /* number of alignments read */ int i; /* counter over seqs */ int64_t alen; /* alignment length */ int nseq; /* number of sequences in the msa */ int64_t rlen; /* a raw (unaligned) seq length */ int64_t small, large; /* smallest, largest sequence */ int64_t nres; /* total # of residues in msa */ double avgid; /* average fractional pair id */ int max_comparisons; /* maximum # comparisons for avg id */ int do_stall; /* used to stall when debugging */ double **abc_ct = NULL; /* [0..msa->alen-1][0..abc->K] number of each residue at each position (abc->K is gap) */ double ***bp_ct = NULL; /* [0..msa->alen-1][0..abc->Kp-1][0..abc->Kp-1] per (non-pknotted) consensus basepair * * count of each possible basepair over all seqs basepairs are indexed by 'i' the minimum * * of 'i:j' for a pair between i and j, where i < j. */ double **pp_ct = NULL; /* [0..msa->alen-1][0..11], count of each posterior probability (PP) code, over all sequences, gap is 11 */ int *i_am_rf = NULL; /* [0..i..msa->alen-1]: TRUE if pos i is non-gap RF posn, if msa->rf == NULL remains NULL */ int *rf2a_map = NULL; /* [0..rfpos..rflen-1] = apos, * apos is the alignment position (0..msa->alen-1) that * is non-gap RF position rfpos+1 (for rfpos in 0..rflen-1) */ int rflen = -1; /* nongap RF length */ char errbuf[eslERRBUFSIZE]; int status; /* easel return code */ /* optional output files */ FILE *iinfofp = NULL; /* output file for --iinfo */ FILE *pcinfofp = NULL; /* output file for --pcinfo */ FILE *psinfofp = NULL; /* output file for --psinfo */ FILE *rinfofp = NULL; /* output file for --rinfo */ FILE *icinfofp = NULL; /* output file for --icinfo */ FILE *listfp = NULL; /* output file for --list */ FILE *cinfofp = NULL; /* output file for --cinfo */ FILE *bpinfofp = NULL; /* output file for --bpinfo */ int use_weights; /* TRUE if --weight, reported weighted counts (using msa->wgt) to all output files */ int weights_exist; /* TRUE if at least one msa->wgt value differs from 1.0, FALSE if not (or if msa->wgt==NULL) */ /*********************************************** * Parse command line ***********************************************/ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_GetBoolean(go, "-h") ) { esl_banner(stdout, argv[0], banner); esl_usage (stdout, argv[0], usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); puts("\n small memory mode, requires --amino,--dna, or --rna and --informat pfam:"); esl_opt_DisplayHelp(stdout, go, 2, 2, 80); puts("\n optional output files:"); esl_opt_DisplayHelp(stdout, go, 3, 2, 80); exit(0); } if (esl_opt_ArgNumber(go) != 1) { printf("Incorrect number of command line arguments.\n"); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } alifile = esl_opt_GetArg(go, 1); if (esl_opt_IsOn(go, "--informat") && (fmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat"))) == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid input sequence file format for --informat", esl_opt_GetString(go, "--informat")); if (esl_opt_GetBoolean(go, "--small") && fmt != eslMSAFILE_PFAM) esl_fatal("--small requires --informat pfam\n"); max_comparisons = 1000; do_stall = esl_opt_GetBoolean(go, "--stall"); /* a stall point for attaching gdb */ while (do_stall); /*********************************************** * Open the MSA file; determine alphabet; set for digital input ***********************************************/ if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); /* We'd like to get rid of the legacy msafile interface, but it * includes small memory functionality for Pfam format which we have * to replace first. For now, use both interfaces, new and legacy */ if ( esl_opt_GetBoolean(go, "--small") ) { if (! abc) esl_fatal("--small requires one of --amino, --dna, --rna be specified."); status = esl_msafile2_OpenDigital(abc, alifile, NULL, &old_afp); if (status == eslENOTFOUND) esl_fatal("Alignment file %s doesn't exist or is not readable\n", alifile); else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of alignment %s\n", alifile); else if (status != eslOK) esl_fatal("Alignment file open failed with error %d\n", status); } else { if ( (status = eslx_msafile_Open(&abc, alifile, NULL, fmt, NULL, &afp)) != eslOK) eslx_msafile_OpenFailure(afp, status); } /************************************** * Open optional output files, as nec * **************************************/ /* determine name for first list file, if nec */ if( esl_opt_IsOn(go, "--list")) { if ((listfp = fopen(esl_opt_GetString(go, "--list"), "w")) == NULL) esl_fatal("Failed to open --list output file %s\n", esl_opt_GetString(go, "--list")); } if( esl_opt_IsOn(go, "--icinfo")) { if ((icinfofp = fopen(esl_opt_GetString(go, "--icinfo"), "w")) == NULL) esl_fatal("Failed to open --icinfo output file %s\n", esl_opt_GetString(go, "--icinfo")); } if( esl_opt_IsOn(go, "--rinfo")) { if ((rinfofp = fopen(esl_opt_GetString(go, "--rinfo"), "w")) == NULL) esl_fatal("Failed to open --rinfo output file %s\n", esl_opt_GetString(go, "--rinfo")); } if( esl_opt_IsOn(go, "--pcinfo")) { if ((pcinfofp = fopen(esl_opt_GetString(go, "--pcinfo"), "w")) == NULL) esl_fatal("Failed to open --pcinfo output file %s\n", esl_opt_GetString(go, "--pcinfo")); } if( esl_opt_IsOn(go, "--psinfo")) { if ((psinfofp = fopen(esl_opt_GetString(go, "--psinfo"), "w")) == NULL) esl_fatal("Failed to open --psinfo output file %s\n", esl_opt_GetString(go, "--psinfo")); } if( esl_opt_IsOn(go, "--iinfo")) { if ((iinfofp = fopen(esl_opt_GetString(go, "--iinfo"), "w")) == NULL) esl_fatal("Failed to open --iinfo output file %s\n", esl_opt_GetString(go, "--iinfo")); } if( esl_opt_IsOn(go, "--cinfo")) { if ((cinfofp = fopen(esl_opt_GetString(go, "--cinfo"), "w")) == NULL) esl_fatal("Failed to open --cinfo output file %s\n", esl_opt_GetString(go, "--cinfo")); } if( esl_opt_IsOn(go, "--bpinfo")) { if ((bpinfofp = fopen(esl_opt_GetString(go, "--bpinfo"), "w")) == NULL) esl_fatal("Failed to open --bpinfo output file %s\n", esl_opt_GetString(go, "--bpinfo")); } /*********************************************** * Read MSAs one at a time. ***********************************************/ if (esl_opt_GetBoolean(go, "-1")) { puts("#"); if(! esl_opt_GetBoolean(go, "--small")) { printf("# %-4s %-20s %10s %7s %7s %12s %6s %6s %10s %3s\n", "idx", "name", "format", "nseq", "alen", "nres", "small", "large", "avlen", "%id"); printf("# %-4s %-20s %10s %7s %7s %12s %6s %6s %10s %3s\n", "----", "--------------------", "----------", "-------", "-------", "------------", "------", "------", "----------", "---"); } else { printf("# %-4s %-20s %10s %7s %7s %12s %10s\n", "idx", "name", "format", "nseq", "alen", "nres", "avlen"); printf("# %-4s %-20s %10s %7s %7s %12s %10s\n", "----", "--------------------", "----------", "-------", "-------", "------------", "----------"); } } nali = 0; fmt = (esl_opt_GetBoolean(go, "--small") ? old_afp->format : afp->format); while ( (status = ( esl_opt_GetBoolean(go, "--small") ? esl_msafile2_ReadInfoPfam(old_afp, listfp, abc, -1, NULL, NULL, &msa, &nseq, &alen, NULL, NULL, NULL, NULL, NULL, &abc_ct, &pp_ct, NULL, NULL, NULL) : eslx_msafile_Read (afp, &msa))) == eslOK) { nali++; nres = 0; if (! esl_opt_GetBoolean(go, "--small")) { nseq = msa->nseq; alen = msa->alen; small = large = -1; for (i = 0; i < msa->nseq; i++) { rlen = esl_abc_dsqrlen(msa->abc, msa->ax[i]); nres += rlen; if (small == -1 || rlen < small) small = rlen; if (large == -1 || rlen > large) large = rlen; } esl_dst_XAverageId(abc, msa->ax, msa->nseq, max_comparisons, &avgid); } else { /* --small invoked */ for(i = 0; i < alen; i++) nres += (int) esl_vec_DSum(abc_ct[i], abc->K); } if (esl_opt_GetBoolean(go, "-1")) { printf("%-6d %-20s %10s %7d %7" PRId64 " %12" PRId64, nali, msa->name, eslx_msafile_DecodeFormat(fmt), nseq, alen, nres); if (! esl_opt_GetBoolean(go, "--small")) { printf(" %6" PRId64 " %6" PRId64 " %10.1f %3.0f\n", small, large, (double) nres / (double) msa->nseq, 100.*avgid); } else { printf(" %10.1f\n", (double) nres / (double) nseq); } } else { printf("Alignment number: %d\n", nali); if (msa->name != NULL) printf("Alignment name: %s\n", msa->name); printf("Format: %s\n", eslx_msafile_DecodeFormat(fmt)); printf("Number of sequences: %d\n", nseq); printf("Alignment length: %" PRId64 "\n", alen); printf("Total # residues: %" PRId64 "\n", nres); if(! esl_opt_GetBoolean(go, "--small")) { printf("Smallest: %" PRId64 "\n", small); printf("Largest: %" PRId64 "\n", large); } printf("Average length: %.1f\n", (double) nres / (double) nseq); if(! esl_opt_GetBoolean(go, "--small")) { printf("Average identity: %.0f%%\n", 100.*avgid); } printf("//\n"); } /* Dump data to optional output files, if nec */ if(esl_opt_IsOn(go, "--list")) { if(! esl_opt_GetBoolean(go, "--small")) { /* only print sequence name to list file if ! --small, else we already have in esl_msafile2_ReadInfoPfam() */ for(i = 0; i < msa->nseq; i++) fprintf(listfp, "%s\n", msa->sqname[i]); } } /* if RF exists, get i_am_rf array[0..alen] which tells us which positions are non-gap RF positions * and rf2a_map, a map of non-gap RF positions to overall alignment positions */ if(msa->rf != NULL) { if((status = map_rfpos_to_apos(msa, abc, errbuf, alen, &i_am_rf, &rf2a_map, &rflen)) != eslOK) esl_fatal(errbuf); } else i_am_rf = NULL; weights_exist = check_msa_weights(msa); use_weights = (weights_exist && esl_opt_GetBoolean(go, "--weight")) ? TRUE : FALSE; if( (! esl_opt_GetBoolean(go, "--small")) && (esl_opt_IsOn(go, "--icinfo") || esl_opt_IsOn(go, "--rinfo") || esl_opt_IsOn(go, "--pcinfo") || esl_opt_IsOn(go, "--cinfo") || esl_opt_IsOn(go, "--bpinfo"))) { /* collect counts of each residue and PPs (if they exist) from the msa */ if(esl_opt_GetBoolean(go, "--weight") && msa->wgt == NULL) esl_fatal("--weight requires all alignments have #=GS WT annotation, but aln %d does not", nali); if((status = count_msa(msa, errbuf, nali, esl_opt_GetBoolean(go, "--noambig"), /* ignore ambiguous residues? */ esl_opt_GetBoolean(go, "--weight"), /* use msa->wgt sequence weights? */ &abc_ct, ((bpinfofp != NULL && msa->ss_cons != NULL) ? &bp_ct : NULL), /* get basepair counts? */ (msa->pp != NULL ? &pp_ct : NULL))) /* get PP counts? */ != eslOK) esl_fatal(errbuf); } if( esl_opt_IsOn(go, "--icinfo")) { if((status = dump_infocontent_info(icinfofp, abc, abc_ct, use_weights, nali, alen, nseq, i_am_rf, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf); } if( esl_opt_IsOn(go, "--rinfo")) { if((status = dump_residue_info(rinfofp, abc, abc_ct, use_weights, nali, alen, nseq, i_am_rf, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf); } if(esl_opt_IsOn(go, "--pcinfo")) { if(pp_ct == NULL) esl_fatal("Error: --pcinfo requires all alignments have #=GR PP annotation, but alignment %d does not", nali); if((status = dump_posterior_column_info(pcinfofp, pp_ct, use_weights, nali, alen, nseq, i_am_rf, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf); } if(esl_opt_IsOn(go, "--psinfo")) { if(msa->pp == NULL) esl_fatal("Error: --psinfo requires all alignments have #=GR PP annotation, but alignment %d does not", nali); if((status = dump_posterior_sequence_info(psinfofp, msa, nali, alifile, errbuf) != eslOK)) esl_fatal(errbuf); } if( esl_opt_IsOn(go, "--iinfo")) { if(msa->rf == NULL) esl_fatal("--iinfo requires all alignments have #=GC RF annotation, but alignment %d does not", nali); if(esl_opt_GetBoolean(go, "--weight") && msa->wgt == NULL) esl_fatal("--weight requires all alignments have #=GS WT annotation, but aln %d does not", nali); if((status = dump_insert_info(iinfofp, msa, use_weights, nali, i_am_rf, alifile, errbuf) != eslOK)) esl_fatal(errbuf); } if( esl_opt_IsOn(go, "--cinfo")) { if((status = dump_column_residue_counts(cinfofp, abc, abc_ct, esl_opt_GetBoolean(go, "--noambig"), use_weights, nali, alen, nseq, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf); } if( esl_opt_IsOn(go, "--bpinfo")) { if(msa->ss_cons == NULL) esl_fatal("--bpinfo requires all alignments have #=GC SS_cons annotation, but alignment %d does not", nali); if((status = dump_basepair_counts(bpinfofp, msa, abc, bp_ct, use_weights, nali, nseq, msa->name, alifile, errbuf) != eslOK)) esl_fatal(errbuf); } esl_msa_Destroy(msa); if(abc_ct != NULL) { esl_Free2D((void **) abc_ct, alen); abc_ct = NULL; } if(bp_ct != NULL) { esl_Free3D((void ***) bp_ct, alen, abc->Kp); bp_ct = NULL; } if(pp_ct != NULL) { esl_Free2D((void **) pp_ct, alen); pp_ct = NULL; } if(i_am_rf != NULL) { free(i_am_rf); i_am_rf = NULL; } if(rf2a_map != NULL) { free(rf2a_map); rf2a_map = NULL; } } /* If an msa read failed, we've dropped out to here with an informative status code. * we have to handle failures from new vs. legacy msa parsing differently */ if (esl_opt_GetBoolean(go, "--small")) { if (status == eslEFORMAT) esl_fatal("Alignment file parse error, line %d of file %s:\n%s\nOffending line is:\n%s\n", old_afp->linenumber, old_afp->fname, old_afp->errbuf, old_afp->buf); else if (status != eslEOF) esl_fatal("Alignment file read failed with error code %d\n", status); else if (nali == 0) esl_fatal("No alignments found in file %s\n", alifile); } else { if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status); } /* Cleanup, normal return */ if(listfp != NULL) { fclose(listfp); printf("# List of sequences in %d alignment(s) saved to file %s\n", nali, esl_opt_GetString(go, "--list")); } if(icinfofp != NULL) { fclose(icinfofp); printf("# Information content data saved to file %s.\n", esl_opt_GetString(go, "--icinfo")); } if(rinfofp != NULL) { fclose(rinfofp); printf("# Residue data saved to file %s.\n", esl_opt_GetString(go, "--rinfo")); } if(pcinfofp != NULL) { fclose(pcinfofp); printf("# Per-column posterior probability data saved to file %s.\n", esl_opt_GetString(go, "--pcinfo")); } if(psinfofp != NULL) { fclose(psinfofp); printf("# Per-sequence posterior probability data saved to file %s.\n", esl_opt_GetString(go, "--psinfo")); } if(iinfofp != NULL) { printf("# Insert data saved to file %s.\n", esl_opt_GetString(go, "--iinfo")); fclose(iinfofp); } if(cinfofp != NULL) { printf("# Per-column counts data saved to file %s.\n", esl_opt_GetString(go, "--cinfo")); fclose(cinfofp); } if(bpinfofp != NULL) { printf("# Per-column basepair counts data saved to file %s.\n", esl_opt_GetString(go, "--bpinfo")); fclose(bpinfofp); } if (afp) eslx_msafile_Close(afp); if (old_afp) esl_msafile2_Close(old_afp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox = NULL; P7_GMX *gx = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc1, sc2; double base_time, bench_time, Mcs; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x")) p7_profile_SameAsMF(om, gm); ox = p7_omx_Create(gm->M, 0, 0); gx = p7_gmx_Create(gm->M, L); /* Get a baseline time: how long it takes just to generate the sequences */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_MSVFilter (dsq, L, om, ox, &sc1); /* -c option: compare generic to fast score */ if (esl_opt_GetBoolean(go, "-c")) { p7_GMSV (dsq, L, gm, gx, 2.0, &sc2); printf("%.4f %.4f\n", sc1, sc2); } /* -x option: compare generic to fast score in a way that should give exactly the same result */ if (esl_opt_GetBoolean(go, "-x")) { p7_GViterbi(dsq, L, gm, gx, &sc2); sc2 /= om->scale_b; if (om->mode == p7_UNILOCAL) sc2 -= 2.0; /* that's ~ L \log \frac{L}{L+2}, for our NN,CC,JJ */ else if (om->mode == p7_LOCAL) sc2 -= 3.0; /* that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ */ printf("%.4f %.4f\n", sc1, sc2); } } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; ESL_RANDOMNESS *rng = esl_randomness_CreateFast(0); P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_TRACE *tr = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; int N = esl_opt_GetInteger(go, "-N"); int i; float vsc, fsc, tsc; char errbuf[eslERRBUFSIZE]; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); if (esl_opt_GetBoolean(go, "-p")) p7_oprofile_Dump(stdout, om); fwd = p7_omx_Create(gm->M, sq->n, sq->n); gx = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_Create(); if (esl_opt_GetBoolean(go, "-m") == TRUE) p7_omx_SetDumpMode(stdout, fwd, TRUE); p7_GViterbi(sq->dsq, sq->n, gm, gx, &vsc); p7_Forward (sq->dsq, sq->n, om, fwd, &fsc); for (i = 0; i < N; i++) { p7_StochasticTrace(rng, sq->dsq, sq->n, om, fwd, tr); p7_trace_Score(tr, sq->dsq, gm, &tsc); if (esl_opt_GetBoolean(go, "-t") == TRUE) p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace %d fails validation:\n%s\n", i, errbuf); printf("Sampled trace: %.4f nats\n", tsc); p7_trace_Reuse(tr); } printf("Forward score: %.4f nats\n", fsc); printf("Viterbi score: %.4f nats\n", vsc); /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_trace_Destroy(tr); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_randomness_Destroy(rng); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox = NULL; P7_GMX *gx = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float msvraw, nullsc, msvscore; float graw, gscore; double P, gP; int status; /* Read in one HMM */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Open sequence file for reading */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); /* allocate DP matrices, both a generic and an optimized one */ ox = p7_omx_Create(gm->M, 0, 0); /* one row version */ gx = p7_gmx_Create(gm->M, sq->n); /* Useful to place and compile in for debugging: p7_oprofile_Dump(stdout, om); dumps the optimized profile p7_omx_SetDumpMode(stdout, ox, TRUE); makes the fast DP algorithms dump their matrices p7_gmx_Dump(stdout, gx, p7_DEFAULT); dumps a generic DP matrix p7_oprofile_SameMSV(om, gm); */ p7_omx_SetDumpMode(stdout, ox, TRUE); while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { p7_oprofile_ReconfigLength(om, sq->n); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_omx_GrowTo(ox, om->M, 0, sq->n); p7_gmx_GrowTo(gx, gm->M, sq->n); p7_MSVFilter (sq->dsq, sq->n, om, ox, &msvraw); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); msvscore = (msvraw - nullsc) / eslCONST_LOG2; P = esl_gumbel_surv(msvscore, om->evparam[p7_MMU], om->evparam[p7_MLAMBDA]); p7_GMSV(sq->dsq, sq->n, gm, gx, 2.0, &graw); gscore = (graw - nullsc) / eslCONST_LOG2; gP = esl_gumbel_surv(gscore, gm->evparam[p7_MMU], gm->evparam[p7_MLAMBDA]); if (esl_opt_GetBoolean(go, "-1")) { printf("%-30s %-20s %9.2g %7.2f %9.2g %7.2f\n", sq->name, hmm->name, P, msvscore, gP, gscore); } else if (esl_opt_GetBoolean(go, "-P")) { /* output suitable for direct use in profmark benchmark postprocessors: */ printf("%g %.2f %s %s\n", P, msvscore, sq->name, hmm->name); } else { printf("target sequence: %s\n", sq->name); printf("msv filter raw score: %.2f nats\n", msvraw); printf("null score: %.2f nats\n", nullsc); printf("per-seq score: %.2f bits\n", msvscore); printf("P-value: %g\n", P); printf("GMSV raw score: %.2f nats\n", graw); printf("GSMV per-seq score: %.2f bits\n", gscore); printf("GSMV P-value: %g\n", gP); } esl_sq_Reuse(sq); } /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); char *qfile = esl_opt_GetArg(go, 1); char *tfile = esl_opt_GetArg(go, 2); ESL_SQFILE *qfp = NULL; ESL_SQFILE *tfp = NULL; ESL_SQ *qsq = esl_sq_CreateDigital(abc); ESL_SQ *tsq = esl_sq_CreateDigital(abc); ESL_SCOREMATRIX *S = esl_scorematrix_Create(abc); ESL_DMATRIX *Q = NULL; P7_BG *bg = p7_bg_Create(abc); P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_REFMX *vit = p7_refmx_Create(200, 400); /* will grow as needed */ double *fa = malloc(sizeof(double) * abc->K); double popen = 0.02; double pextend = 0.4; double lambda; float vsc; float nullsc; int status; esl_composition_BL62(fa); esl_vec_D2F(fa, abc->K, bg->f); esl_scorematrix_Set("BLOSUM62", S); esl_scorematrix_ProbifyGivenBG(S, fa, fa, &lambda, &Q); esl_scorematrix_JointToConditionalOnQuery(abc, Q); if (esl_sqfile_OpenDigital(abc, qfile, eslSQFILE_UNKNOWN, NULL, &qfp) != eslOK) esl_fatal("failed to open %s", qfile); if (esl_sqio_Read(qfp, qsq) != eslOK) esl_fatal("failed to read query seq"); p7_Seqmodel(abc, qsq->dsq, qsq->n, qsq->name, Q, bg->f, popen, pextend, &hmm); p7_hmm_SetComposition(hmm); p7_hmm_SetConsensus(hmm, qsq); gm = p7_profile_Create(hmm->M, abc); p7_profile_ConfigUnilocal(gm, hmm, bg, 400); if (esl_sqfile_OpenDigital(abc, tfile, eslSQFILE_UNKNOWN, NULL, &tfp) != eslOK) esl_fatal("failed to open %s", tfile); while ((status = esl_sqio_Read(tfp, tsq)) == eslOK) { p7_bg_SetLength (bg, tsq->n); p7_profile_SetLength(gm, tsq->n); p7_ReferenceViterbi(tsq->dsq, tsq->n, gm, vit, NULL, &vsc); p7_bg_NullOne(bg, tsq->dsq, tsq->n, &nullsc); printf("%.4f %-25s %-25s\n", (vsc - nullsc) / eslCONST_LOG2, tsq->name, gm->name); esl_sq_Reuse(tsq); p7_refmx_Reuse(vit); } p7_refmx_Destroy(vit); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); p7_bg_Destroy(bg); esl_dmatrix_Destroy(Q); esl_scorematrix_Destroy(S); free(fa); esl_sq_Destroy(qsq); esl_sq_Destroy(tsq); esl_sqfile_Close(qfp); esl_sqfile_Close(tfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go; char *msafile; ESL_MSAFILE *afp; ESL_MSA *msa; float *sqd; int status; int nbad; int nali = 0; int nbadali = 0; int nwgt = 0; int nbadwgt = 0; int i; int be_quiet; int do_gsc; int do_pb; int do_blosum; double maxid; double tol; int maxN; /* Process command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("failed to parse cmd line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal("failed to parse cmd line: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { puts(usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ return 0; } be_quiet = esl_opt_GetBoolean(go, "-q"); do_blosum = esl_opt_GetBoolean(go, "--blosum"); do_gsc = esl_opt_GetBoolean(go, "--gsc"); do_pb = esl_opt_GetBoolean(go, "--pb"); maxid = esl_opt_GetReal (go, "--id"); tol = esl_opt_GetReal (go, "--tol"); maxN = esl_opt_GetInteger(go, "--maxN"); if (esl_opt_ArgNumber(go) != 1) { puts("Incorrect number of command line arguments."); puts(usage); return 1; } msafile = esl_opt_GetArg(go, 1); esl_getopts_Destroy(go); /* Weight one or more alignments from input file */ esl_msafile_Open(msafile, eslMSAFILE_UNKNOWN, NULL, &afp); while (esl_msa_Read(afp, &msa) == eslOK) { if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; } nali++; nwgt += msa->nseq; ESL_ALLOC(sqd, sizeof(float) * msa->nseq); if (do_gsc) { esl_msaweight_GSC(msa); GSCWeights(msa->aseq, msa->nseq, msa->alen, sqd); } else if (do_pb) { esl_msaweight_PB(msa); PositionBasedWeights(msa->aseq, msa->nseq, msa->alen, sqd); } else if (do_blosum) { esl_msaweight_BLOSUM(msa, maxid); BlosumWeights(msa->aseq, msa->nseq, msa->alen, maxid, sqd); /* workaround SQUID bug: BLOSUM weights weren't renormalized to sum to nseq. */ esl_vec_FNorm (sqd, msa->nseq); esl_vec_FScale(sqd, msa->nseq, (float) msa->nseq); } if (! be_quiet) { for (i = 0; i < msa->nseq; i++) fprintf(stdout, "%-20s %.3f %.3f\n", msa->sqname[i], msa->wgt[i], sqd[i]); } nbad = 0; for (i = 0; i < msa->nseq; i++) if (esl_DCompare((double) sqd[i], msa->wgt[i], tol) != eslOK) nbad++; if (nbad > 0) nbadali++; nbadwgt += nbad; if (nbad > 0) printf("%-20s :: alignment shows %d weights that differ (out of %d) \n", msa->name, nbad, msa->nseq); esl_msa_Destroy(msa); free(sqd); } esl_msafile_Close(afp); if (nbadali == 0) printf("OK: all weights identical between squid and Easel in %d alignment(s)\n", nali); else { printf("%d of %d weights mismatched at (> %f fractional difference)\n", nbadwgt, nwgt, tol); printf("involving %d of %d total alignments\n", nbadali, nali); } return eslOK; ERROR: return status; }