/* output_filter_power() * * Used for testing whether the filters (MSV scores, Viterbi scores) * have the power they're supposed to have: for example, if MSV filter * is set at a P-value threshold of 0.02, ~2% of sequences should get * through, regardless of things like model and target sequence * length. * * Output a file suitable for constructing histograms over many HMMs, * for a particular choice of hmmsim'ed L and N targets: * <hmm name> <# of seqs passing threshold> <fraction of seqs passing threshold> * * SRE, Thu Apr 9 08:57:32 2009 [Janelia] xref J4/133 */ static int output_filter_power(ESL_GETOPTS *go, struct cfg_s *cfg, char *errbuf, P7_HMM *hmm, double *scores) { double pthresh = esl_opt_GetReal(go, "--pthresh"); /* P-value threshold set for the filter score */ double P; /* calculated P-value (using HMM's own calibration) */ int npass = 0; /* number of scores that pass the P threshold */ double fpass; /* fraction of scores that pass the P threshold */ int i; /* counter over scores */ int do_gumbel; /* flag for how to determine P values */ double pmu, plambda; if (esl_opt_GetBoolean(go, "--vit")) { pmu = hmm->evparam[p7_VMU]; plambda = hmm->evparam[p7_VLAMBDA]; do_gumbel = TRUE; } else if (esl_opt_GetBoolean(go, "--msv")) { pmu = hmm->evparam[p7_MMU]; plambda = hmm->evparam[p7_MLAMBDA]; do_gumbel = TRUE; } else if (esl_opt_GetBoolean(go, "--fwd")) { pmu = hmm->evparam[p7_FTAU]; plambda = hmm->evparam[p7_FLAMBDA]; do_gumbel = FALSE; } else ESL_FAIL(eslEINVAL, errbuf, "can only use --ffile with viterbi, msv, or fwd scores"); for (i = 0; i < cfg->N; i++) { P = (do_gumbel ? esl_gumbel_surv(scores[i], pmu, plambda) : esl_exp_surv (scores[i], pmu, plambda)); if (P <= pthresh) npass++; } fpass = (double) npass / (double) cfg->N; fprintf(cfg->ffp, "%s\t%d\t%.4f\n", hmm->name, npass, fpass); return eslOK; }
/* utest_logf(): Test range/domain of logf */ static void utest_logf(ESL_GETOPTS *go) { __m128 x; /* test input */ union { __m128 v; float x[4]; } r; /* test output */ /* Test IEEE754 specials: * log(-inf) = NaN log(x<0) = NaN log(-0) = NaN * log(0) = -inf log(inf) = inf log(NaN) = NaN */ x = _mm_set_ps(0.0, -0.0, -1.0, -eslINFINITY); /* set_ps() is in order 3 2 1 0 */ r.v = esl_sse_logf(x); if (esl_opt_GetBoolean(go, "-v")) { printf("logf"); esl_sse_dump_ps(stdout, x); printf(" ==> "); esl_sse_dump_ps(stdout, r.v); printf("\n"); } if (! isnan(r.x[0])) esl_fatal("logf(-inf) should be NaN"); if (! isnan(r.x[1])) esl_fatal("logf(-1) should be NaN"); if (! isnan(r.x[2])) esl_fatal("logf(-0) should be NaN"); if (! (r.x[3] < 0 && isinf(r.x[3]))) esl_fatal("logf(0) should be -inf"); x = _mm_set_ps(FLT_MAX, FLT_MIN, eslNaN, eslINFINITY); r.v = esl_sse_logf(x); if (esl_opt_GetBoolean(go, "-v")) { printf("logf"); esl_sse_dump_ps(stdout, x); printf(" ==> "); esl_sse_dump_ps(stdout, r.v); printf("\n"); } if (! isinf(r.x[0])) esl_fatal("logf(inf) should be inf"); if (! isnan(r.x[1])) esl_fatal("logf(NaN) should be NaN"); }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; ESL_RANDOMNESS *r = NULL; int be_verbose; go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) esl_fatal("%s", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { esl_usage(stdout, argv[0], usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ exit(0); } if (esl_opt_ArgNumber(go) != 0) { printf("Incorrect number of command line arguments.\n"); esl_usage(stdout, argv[0], usage); exit(1); } be_verbose = esl_opt_GetBoolean(go, "-v"); if (esl_opt_GetBoolean(go, "-r")) { r = esl_randomness_CreateTimeseeded(); if (be_verbose) printf("seed = %ld\n", esl_randomness_GetSeed(r)); } else r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); utest_LinearRegression(r, TRUE, be_verbose); utest_LinearRegression(r, FALSE, be_verbose); esl_getopts_Destroy(go); esl_randomness_Destroy(r); exit(0); }
/* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence, * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging. */ static void utest_basic(ESL_GETOPTS *go) { char *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n"; int fmt = eslMSAFILE_STOCKHOLM; char *targ = "GAATTC"; ESL_ALPHABET *abc = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_BG *bg = NULL; P7_PRIOR *pri = NULL; ESL_DSQ *dsq = NULL; P7_GMX *gx = NULL; P7_TRACE *tr = NULL; int L = strlen(targ); float vsc, vsc2, fsc; if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet"); if ((pri = p7_prior_CreateNucleic()) == NULL) esl_fatal("failed to create prior"); if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL) esl_fatal("failed to create MSA"); if (esl_msa_Digitize(abc, msa, NULL) != eslOK) esl_fatal("failed to digitize MSA"); if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model"); if (p7_ParameterEstimation(hmm, pri) != eslOK) esl_fatal("failed to parameterize GAATTC model"); if (p7_hmm_SetConsensus(hmm, NULL) != eslOK) esl_fatal("failed to make consensus"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create DNA null model"); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create GAATTC profile"); if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile"); if (p7_profile_Validate(gm, NULL, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!"); if (esl_abc_CreateDsq(abc, targ, &dsq) != eslOK) esl_fatal("failed to create GAATTC digital sequence"); if ((gx = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("failed to create DP matrix"); if ((tr = p7_trace_Create()) == NULL) esl_fatal("trace creation failed"); p7_GViterbi (dsq, L, gm, gx, &vsc); if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_GTrace (dsq, L, gm, gx, tr); p7_trace_Score(tr, dsq, gm, &vsc2); if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq); if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK) esl_fatal("trace score and Viterbi score don't agree."); p7_GForward (dsq, L, gm, gx, &fsc); if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_trace_Destroy(tr); p7_gmx_Destroy(gx); free(dsq); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); p7_prior_Destroy(pri); esl_alphabet_Destroy(abc); return; }
/* msa_shuffling() * SRE, Tue Jan 22 08:39:51 2008 [Market Street Cafe, Leesburg] * * Shuffling multiple sequence alignments */ static int msa_shuffling(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt) { char *msafile = esl_opt_GetArg(go, 1); int infmt = eslMSAFILE_UNKNOWN; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; ESL_MSA *shuf = NULL; int N = esl_opt_GetInteger(go, "-N"); int i; int status, mstatus; status = esl_msafile_Open(msafile, infmt, NULL, &afp); if (status == eslENOTFOUND) esl_fatal("Alignment file %s isn't readable\n", msafile); else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of %s\n", msafile); else if (status != eslOK) esl_fatal("Alignment file open failed (error %d)\n", status); while ((mstatus = esl_msa_Read(afp, &msa)) != eslEOF) { if (status == eslEFORMAT) esl_fatal("Alignment file parse error:\n%s\n", afp->errbuf); else if (status == eslEINVAL) esl_fatal("Alignment file parse error:\n%s\n", afp->errbuf); else if (status != eslOK) esl_fatal("Alignment file read failed with error code %d\n", status); shuf = esl_msa_Clone(msa); for (i = 0; i < N; i++) { if (esl_opt_GetBoolean(go, "--boot")) esl_msashuffle_Bootstrap(r, msa, shuf); else esl_msashuffle_Shuffle (r, msa, shuf); /* Set the name of the shuffled alignment */ if (msa->name != NULL) { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "%s-sample-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-sample", msa->name); } else { if (N > 1) esl_msa_FormatName(shuf, "%s-shuffle-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-shuffle", msa->name); } } else { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "sample-%d", i); else esl_msa_FormatName(shuf, "sample"); } else { if (N > 1) esl_msa_FormatName(shuf, "shuffle-%d", i); else esl_msa_FormatName(shuf, "shuffle"); } } esl_msa_Write(ofp, shuf, outfmt); } esl_msa_Destroy(shuf); esl_msa_Destroy(msa); } return eslOK; }
/* utest_odds(): test accuracy of logf, expf on odds ratios, * our main intended use. */ static void utest_odds(ESL_GETOPTS *go, ESL_RANDOMNESS *r) { int N = esl_opt_GetInteger(go, "-N"); int verbose = esl_opt_GetBoolean(go, "-v"); int very_verbose = esl_opt_GetBoolean(go, "--vv"); int i; float p1, p2, odds; union { __m128 v; float x[4]; } r1; union { __m128 v; float x[4]; } r2; float scalar_r1, scalar_r2; double err1, maxerr1 = 0.0, avgerr1 = 0.0; /* errors on logf() */ double err2, maxerr2 = 0.0, avgerr2 = 0.0; /* errors on expf() */ for (i = 0; i < N; i++) { p1 = esl_rnd_UniformPositive(r); p2 = esl_rnd_UniformPositive(r); odds = p1 / p2; if (odds == 0.0) esl_fatal("whoa, odds ratio can't be 0!\n"); r1.v = esl_sse_logf(_mm_set1_ps(odds)); /* r1.x[z] = log(p1/p2) */ scalar_r1 = log(odds); err1 = (r1.x[0] == 0. && scalar_r1 == 0.) ? 0.0 : 2 * fabs(r1.x[0] - scalar_r1) / fabs(r1.x[0] + scalar_r1); if (err1 > maxerr1) maxerr1 = err1; avgerr1 += err1 / (float) N; if (isnan(avgerr1)) esl_fatal("whoa, what?\n"); r2.v = esl_sse_expf(r1.v); /* and back to odds */ scalar_r2 = exp(r1.x[0]); err2 = (r2.x[0] == 0. && scalar_r2 == 0.) ? 0.0 : 2 * fabs(r2.x[0] - scalar_r2) / fabs(r2.x[0] + scalar_r2); if (err2 > maxerr2) maxerr2 = err2; avgerr2 += err2 / (float) N; if (very_verbose) printf("%13.7g %13.7g %13.7g %13.7g %13.7g %13.7g %13.7g\n", odds, scalar_r1, r1.x[0], scalar_r2, r2.x[0], err1, err2); } if (verbose) { printf("Average [max] logf() relative error in %d odds trials: %13.8g [%13.8g]\n", N, avgerr1, maxerr1); printf("Average [max] expf() relative error in %d odds trials: %13.8g [%13.8g]\n", N, avgerr2, maxerr2); printf("(random seed : %" PRIu32 ")\n", esl_randomness_GetSeed(r)); } if (avgerr1 > 1e-8) esl_fatal("average error on logf() is intolerable\n"); if (maxerr1 > 1e-6) esl_fatal("maximum error on logf() is intolerable\n"); if (avgerr2 > 1e-8) esl_fatal("average error on expf() is intolerable\n"); if (maxerr2 > 1e-6) esl_fatal("maximum error on expf() is intolerable\n"); }
/* utest_expf(): Test range/domain of expf */ static void utest_expf(ESL_GETOPTS *go) { __m128 x; /* test input */ union { __m128 v; float x[4]; } r; /* test output */ /* exp(-inf) = 0 exp(-0) = 1 exp(0) = 1 exp(inf) = inf exp(NaN) = NaN */ x = _mm_set_ps(eslINFINITY, 0.0, -0.0, -eslINFINITY); /* set_ps() is in order 3 2 1 0 */ r.v = esl_sse_expf(x); if (esl_opt_GetBoolean(go, "-v")) { printf("expf"); esl_sse_dump_ps(stdout, x); printf(" ==> "); esl_sse_dump_ps(stdout, r.v); printf("\n"); } if (r.x[0] != 0.0f) esl_fatal("expf(-inf) should be 0"); if (r.x[1] != 1.0f) esl_fatal("logf(-0) should be 1"); if (r.x[2] != 1.0f) esl_fatal("logf(0) should be 1"); if (! isinf(r.x[3])) esl_fatal("logf(inf) should be inf"); /* exp(NaN) = NaN exp(large) = inf exp(-large) = 0 exp(1) = exp(1) */ x = _mm_set_ps(1.0f, -666.0f, 666.0f, eslNaN); /* set_ps() is in order 3 2 1 0 */ r.v = esl_sse_expf(x); if (esl_opt_GetBoolean(go, "-v")) { printf("expf"); esl_sse_dump_ps(stdout, x); printf(" ==> "); esl_sse_dump_ps(stdout, r.v); printf("\n"); } if (! isnan(r.x[0])) esl_fatal("expf(NaN) should be NaN"); if (! isinf(r.x[1])) esl_fatal("expf(large x) should be inf"); if (r.x[2] != 0.0f) esl_fatal("expf(-large x) should be 0"); /* Make sure we are correct around the problematic ~minlogf boundary: * (1) e^x for x < -127.5 log2 + epsilon is 0, because that's our minlogf barrier. * (2) e^x for -127.5 log2 < x < -126.5 log2 is 0 too, but is actually calculated * (3) e^x for -126.5 log2 < x should be finite (and close to FLT_MIN) * * minlogf = -127.5 log(2) + epsilon = -88.3762626647949; * and -126.5 log(2) = -87.68311834 * so for * (1): expf(-88.3763) => 0 * (2): expf(-88.3762) => 0 * (3): expf(-87.6832) => 0 * (4): expf(-87.6831) => <FLT_MIN (subnormal) : ~8.31e-39 (may become 0 in flush-to-zero mode for subnormals) */ x = _mm_set_ps(-88.3763, -88.3762, -87.6832, -87.6831); r.v = esl_sse_expf(x); if (esl_opt_GetBoolean(go, "-v")) { printf("expf"); esl_sse_dump_ps(stdout, x); printf(" ==> "); esl_sse_dump_ps(stdout, r.v); printf("\n"); } if ( r.x[0] >= FLT_MIN) esl_fatal("expf( -126.5 log2 + eps) should be around FLT_MIN"); if ( r.x[1] != 0.0f) esl_fatal("expf( -126.5 log2 - eps) should be 0.0 (by calculation)"); if ( r.x[2] != 0.0f) esl_fatal("expf( -127.5 log2 + eps) should be 0.0 (by calculation)"); if ( r.x[3] != 0.0f) esl_fatal("expf( -127.5 log2 - eps) should be 0.0 (by min bound): %g", r.x[0]); }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* application configuration */ ESL_RANDOMNESS *r = NULL; /* random number generator */ FILE *ofp = NULL; /* data output stream */ int outfmt = eslSQFILE_FASTA; /* Parse command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in app configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") ) cmdline_help(argv[0], go); /* Open the output data file, if any */ if (esl_opt_GetString(go, "-o") != NULL) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) esl_fatal("Failed to open output file %s\n", esl_opt_GetString(go, "-o")); } else ofp = stdout; /* Initialize */ r = esl_randomness_Create(esl_opt_GetInteger(go, "--seed")); /* Hand off execution to one of the three modes */ if (esl_opt_GetBoolean(go, "-A")) /* Alignment shuffling */ { if (esl_opt_ArgNumber(go) != 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); msa_shuffling(go, r, ofp, outfmt); } else if (esl_opt_GetBoolean(go, "-G")) /* Sequence generation */ { if (esl_opt_ArgNumber(go) != 0) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); seq_generation(go, r, ofp, outfmt); } else if (esl_opt_GetBoolean(go, "-S")) /* Sequence shuffling */ { if (esl_opt_ArgNumber(go) != 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); seq_shuffling(go, r, ofp, outfmt); } if (esl_opt_GetString(go, "-o") != NULL) fclose(ofp); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
/* mpi_worker() * The main control for an MPI worker process. */ static void mpi_worker(ESL_GETOPTS *go, struct cfg_s *cfg) { int xstatus = eslOK; int status; P7_HMM *hmm = NULL; char *wbuf = NULL; double *xv = NULL; /* result: array of N scores */ int *av = NULL; /* optional result: array of N alignment lengths */ int wn = 0; char errbuf[eslERRBUFSIZE]; int pos; /* Worker initializes */ if ((status = minimum_mpi_working_buffer(go, cfg->N, &wn)) != eslOK) xstatus = status; ESL_ALLOC(wbuf, wn * sizeof(char)); ESL_ALLOC(xv, cfg->N * sizeof(double) + 2); if (esl_opt_GetBoolean(go, "-a")) ESL_ALLOC(av, cfg->N * sizeof(int)); /* Main worker loop */ while (p7_hmm_mpi_Recv(0, 0, MPI_COMM_WORLD, &wbuf, &wn, &(cfg->abc), &hmm) == eslOK) { if (esl_opt_GetBoolean(go, "--recal")) { if (( status = recalibrate_model(go, cfg, errbuf, hmm)) != eslOK) goto CLEANERROR; } if ((status = process_workunit(go, cfg, errbuf, hmm, xv, av)) != eslOK) goto CLEANERROR; pos = 0; MPI_Pack(&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Pack(xv, cfg->N, MPI_DOUBLE, wbuf, wn, &pos, MPI_COMM_WORLD); if (esl_opt_GetBoolean(go, "-a")) MPI_Pack(av, cfg->N, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); p7_hmm_Destroy(hmm); } free(wbuf); free(xv); if (av != NULL) free(av); return; CLEANERROR: pos = 0; MPI_Pack(&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Pack(errbuf, eslERRBUFSIZE, MPI_CHAR, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); if (wbuf != NULL) free(wbuf); if (hmm != NULL) p7_hmm_Destroy(hmm); if (xv != NULL) free(xv); if (av != NULL) free(av); return; ERROR: p7_Fail("Allocation error in mpi_worker"); }
/* msa_shuffling() * * Shuffling multiple sequence alignments */ static int msa_shuffling(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt) { char *msafile = esl_opt_GetArg(go, 1); int infmt = eslMSAFILE_UNKNOWN; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; ESL_MSA *shuf = NULL; int N = esl_opt_GetInteger(go, "-N"); int i; int status; if ( (status = esl_msafile_Open(NULL, msafile, NULL, infmt, NULL, &afp)) != eslOK) esl_msafile_OpenFailure(afp, status); while ((status = esl_msafile_Read(afp, &msa)) != eslEOF) { if (status != eslOK) esl_msafile_ReadFailure(afp, status); shuf = esl_msa_Clone(msa); for (i = 0; i < N; i++) { if (esl_opt_GetBoolean(go, "--boot")) esl_msashuffle_Bootstrap(r, msa, shuf); else esl_msashuffle_Shuffle (r, msa, shuf); /* Set the name of the shuffled alignment */ if (msa->name != NULL) { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "%s-sample-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-sample", msa->name); } else { if (N > 1) esl_msa_FormatName(shuf, "%s-shuffle-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-shuffle", msa->name); } } else { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "sample-%d", i); else esl_msa_FormatName(shuf, "sample"); } else { if (N > 1) esl_msa_FormatName(shuf, "shuffle-%d", i); else esl_msa_FormatName(shuf, "shuffle"); } } esl_msafile_Write(ofp, shuf, afp->format); } esl_msa_Destroy(shuf); esl_msa_Destroy(msa); } esl_msafile_Close(afp); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *msafile = esl_opt_GetArg(go, 1); ESL_ALPHABET *abc = NULL; int infmt = eslMSAFILE_UNKNOWN; ESLX_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; FILE *ofp = stdout; int nali = 0; int namewidth; double pid; int nid, n; int i,j; int status; /* allow user to assert the input MSA alphabet */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); /* allow user to assert the input MSA format */ if (esl_opt_IsOn(go, "--informat") && (infmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat"))) == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid MSA file format for --informat", esl_opt_GetString(go, "--informat")); /* digital open */ if ( ( status = eslx_msafile_Open(&abc, msafile, NULL, infmt, NULL, &afp)) != eslOK) eslx_msafile_OpenFailure(afp, status); while ((status = eslx_msafile_Read(afp, &msa)) == eslOK) { nali++; namewidth = esl_str_GetMaxWidth(msa->sqname, msa->nseq); for (i = 0; i < msa->nseq; i++) for (j = i+1; j < msa->nseq; j++) { esl_dst_XPairId(abc, msa->ax[i], msa->ax[j], &pid, &nid, &n); fprintf(ofp, "%-*s %-*s %6.2f %6d %6d\n", namewidth, msa->sqname[i], namewidth, msa->sqname[j], pid*100.0, nid, n); } esl_msa_Destroy(msa); } if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status); eslx_msafile_Close(afp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(0); char *msafile = esl_opt_GetArg(go, 1); int fmt = eslMSAFILE_UNKNOWN; ESL_ALPHABET *abc = NULL; ESLX_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; int textmode = esl_opt_GetBoolean(go, "--text"); int nali = 0; int status; /* If you know the alphabet you want, create it - you'll pass it to eslx_msafile_Open() */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); /* Open in text or digital mode. * To let the Open() function autoguess the format, you pass <infmt=eslMSAFILE_UNKNOWN>. * To let it autoguess the alphabet, you set <abc=NULL> and pass <&abc>. * To open in text mode instead of digital, you pass <NULL> for the alphabet argument. * eslx_msafile_OpenFailure() is a convenience, printing various diagnostics of any * open failure to <stderr>. You can of course handle your own diagnostics instead. */ if (textmode) status = eslx_msafile_Open(NULL, msafile, NULL, fmt, NULL, &afp); else status = eslx_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp); if (status != eslOK) eslx_msafile_OpenFailure(afp, status); fmt = afp->format; while ((status = eslx_msafile_Read(afp, &msa)) == eslOK) { /* if digital MSA: msa->ax[idx=0..nseq-1][acol=1..alen] is the alignment data; * if text MSA: msa->aseq[idx=0..nseq-1][acol=0..alen-1] */ nali++; /* permute it */ esl_msashuffle_PermuteSequenceOrder(rng, msa); eslx_msafile_Write(stdout, msa, fmt); esl_msa_Destroy(msa); } if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status); /* a convenience, like eslx_msafile_OpenFailure() */ esl_alphabet_Destroy(abc); eslx_msafile_Close(afp); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); exit(0); }
static void emit_sequences(ESL_GETOPTS *go, FILE *ofp, int outfmt, ESL_RANDOMNESS *r, P7_HMM *hmm) { ESL_SQ *sq = NULL; P7_TRACE *tr = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; int do_profile = esl_opt_GetBoolean(go, "-p"); int N = esl_opt_GetInteger(go, "-N"); int L = esl_opt_GetInteger(go, "-L"); int mode = p7_LOCAL; int nseq; int status; if (esl_opt_GetBoolean(go, "--local")) mode = p7_LOCAL; else if (esl_opt_GetBoolean(go, "--unilocal")) mode = p7_UNILOCAL; else if (esl_opt_GetBoolean(go, "--glocal")) mode = p7_GLOCAL; else if (esl_opt_GetBoolean(go, "--uniglocal")) mode = p7_UNIGLOCAL; if ((sq = esl_sq_CreateDigital(hmm->abc)) == NULL) esl_fatal("failed to allocate sequence"); if ((tr = p7_trace_Create()) == NULL) esl_fatal("failed to allocate trace"); if ((bg = p7_bg_Create(hmm->abc)) == NULL) esl_fatal("failed to create null model"); if ((gm = p7_profile_Create(hmm->M, hmm->abc)) == NULL) esl_fatal("failed to create profile"); if (p7_ProfileConfig(hmm, bg, gm, L, mode) != eslOK) esl_fatal("failed to configure profile"); if (p7_bg_SetLength(bg, L) != eslOK) esl_fatal("failed to reconfig null model length"); if (p7_hmm_Validate (hmm, NULL, 0.0001) != eslOK) esl_fatal("whoops, HMM is bad!"); if (p7_profile_Validate(gm, NULL, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!"); for (nseq = 1; nseq <= N; nseq++) { if (do_profile) status = p7_ProfileEmit(r, hmm, gm, bg, sq, tr); else status = p7_CoreEmit (r, hmm, sq, tr); if (status) esl_fatal("Failed to emit sequence\n"); status = esl_sq_FormatName(sq, "%s-sample%d", hmm->name, nseq); if (status) esl_fatal("Failed to set sequence name\n"); status = esl_sqio_Write(ofp, sq, outfmt, FALSE); if (status != eslOK) esl_fatal("Failed to write sequence\n"); p7_trace_Reuse(tr); esl_sq_Reuse(sq); } esl_sq_Destroy(sq); p7_trace_Destroy(tr); p7_bg_Destroy(bg); p7_profile_Destroy(gm); return; }
/* seq_generation() * * Generating sequences. */ static int seq_generation(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt) { ESL_ALPHABET *abc = NULL; ESL_SQ *sq = NULL; double *fq = NULL; int alphatype = eslUNKNOWN; // static checkers can't see that 1 of --rna, --dna, --amino must be true int N = esl_opt_GetInteger(go, "-N"); int L = esl_opt_GetInteger(go, "-L"); int i; int status; if (L <= 0) esl_fatal("To generate sequences, set -L option (length of generated seqs) > 0 "); if (esl_opt_GetBoolean(go, "--rna")) alphatype = eslRNA; if (esl_opt_GetBoolean(go, "--dna")) alphatype = eslDNA; if (esl_opt_GetBoolean(go, "--amino")) alphatype = eslAMINO; abc = esl_alphabet_Create(alphatype); sq = esl_sq_CreateDigital(abc); esl_sq_GrowTo(sq, L); /* Pick the iid frequency distribution to use */ ESL_ALLOC(fq, sizeof(double) * abc->K); switch (alphatype) { case eslRNA: case eslDNA: esl_vec_DSet(fq, 4, 0.25); break; case eslAMINO: esl_composition_SW34(fq); break; default: esl_vec_DSet(fq, abc->K, 1.0 / (double) abc->K); break; } /* generate */ for (i = 0; i < N; i++) { esl_rsq_xIID(r, fq, abc->K, L, sq->dsq); if (N > 1) esl_sq_FormatName(sq, "random%d", i); else esl_sq_SetName(sq, "random"); sq->n = L; esl_sqio_Write(ofp, sq, outfmt, FALSE); } free(fq); esl_alphabet_Destroy(abc); esl_sq_Destroy(sq); return eslOK; ERROR: if (fq != NULL) free(fq); esl_alphabet_Destroy(abc); esl_sq_Destroy(sq); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_STOPWATCH *w = esl_stopwatch_Create(); int N = esl_opt_GetInteger(go, "-N"); int i; float *A, *B, *C; p7_FLogsumInit(); /* Create the problem: sample N values A,B on interval -1000,1000: about the range of H3 scores */ A = malloc(sizeof(float) * N); B = malloc(sizeof(float) * N); C = malloc(sizeof(float) * N); for (i = 0; i < N; i++) { A[i] = esl_random(r) * 2000. - 1000.; B[i] = esl_random(r) * 2000. - 1000.; } /* Run */ esl_stopwatch_Start(w); if (esl_opt_GetBoolean(go, "-n")) { for (i = 0; i < N; i++) C[i] = naive2(A[i], B[i]); } else if (esl_opt_GetBoolean(go, "-r")) { for (i = 0; i < N; i++) C[i] = naive1(A[i], B[i]); } else { for (i = 0; i < N; i++) C[i] = p7_FLogsum(A[i], B[i]); } esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = NULL; int stalling = esl_opt_GetBoolean(go, "--stall"); int my_rank; int nproc; /* The startup sequence below is designed in part to facilitate debugging. * To debug an MPI program, you start it with 'mpirun' as usual, but * with the --stall flag; this causes all processes to start, but then * wait for the developer to attach gdb's to each running process. * Example: * mpirun -n 2 ./p7_hmm_mpi_utest --stall * [pid's <pid0> and <pid1> are reported for processes 0 and 1] * in one terminal window: * gdb ./p7_hmm_mpi_utest <pid0> * [set desired breakpoint in the master] * set stalling=0 * c * and similarly in a second terminal window, for worker <pid1>. * * Additionally, we want to show the rng seed. This is so we can diagnose * failures that are rare; we can re-run the unit test until we see it fail, * get the rng seed, then reproduce exactly that failure. */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (my_rank == 0) { rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); fprintf(stderr, "# MPI nproc = %d\n", nproc); } #ifdef HAVE_GETPID /* To debug an MPI program, you attach gdb's to each process, already running; * for this, you need the pid of each process. Provide the pid's to help the * developer. */ fprintf(stderr, "# %6d = %d\n", my_rank, getpid()); #endif /* This infinite loop waits on the developer to attach gdb's to each process. * In each gdb, developer may set a better breakpoint, then does "set stalling=0" * and "continue" to release the waiting process from this stall point. */ while (stalling); utest_SendRecv(rng, my_rank, nproc); if (my_rank == 0) { fprintf(stderr, "# status = ok\n"); esl_randomness_Destroy(rng); } MPI_Finalize(); esl_getopts_Destroy(go); exit(0); /* success */ }
/* Function: p7_CreateDefaultApp() * Synopsis: Initialize a small/simple/standard HMMER application * Incept: SRE, Thu Oct 28 15:03:21 2010 [Janelia] * * Purpose: Identical to <esl_getopts_CreateDefaultApp()>, but * specialized for HMMER. See documentation in * <easel/esl_getopts.c>. * * Args: options - array of <ESL_OPTIONS> structures for getopts * nargs - number of cmd line arguments expected (excl. of cmdname) * argc - <argc> from main() * argv - <argv> from main() * banner - optional one-line description of program (or NULL) * usage - optional one-line usage hint (or NULL) * * Returns: ptr to new <ESL_GETOPTS> object. * * On command line errors, this routine prints an error * message to <stderr> then calls <exit(1)> to halt * execution with abnormal (1) status. * * If the standard <-h> option is seen, the routine prints * the help page (using the data in the <options> structure), * then calls <exit(0)> to exit with normal (0) status. * * Xref: J7/3 * * Note: The only difference between this and esl_getopts_CreateDefaultApp() * is to call p7_banner() instead of esl_banner(), to get HMMER * versioning info into the header. There ought to be a better way * (perhaps using PACKAGE_* define's instead of HMMER_* vs. EASEL_* * define's in esl_banner(), thus removing the need for p7_banner). */ ESL_GETOPTS * p7_CreateDefaultApp(ESL_OPTIONS *options, int nargs, int argc, char **argv, char *banner, char *usage) { ESL_GETOPTS *go = NULL; go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); if (usage != NULL) esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_GetBoolean(go, "-h") == TRUE) { if (banner != NULL) p7_banner(stdout, argv[0], banner); if (usage != NULL) esl_usage (stdout, argv[0], usage); puts("\nOptions:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); exit(0); } if (esl_opt_ArgNumber(go) != nargs) { puts("Incorrect number of command line arguments."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } return go; }
static void onefetch_subseq(ESL_GETOPTS *go, FILE *ofp, ESL_SQFILE *sqfp, char *newname, char *key, uint32_t given_start, uint32_t given_end) { int start, end; int do_revcomp; ESL_SQ *sq = esl_sq_Create(); if (sqfp->data.ascii.ssi == NULL) esl_fatal("no ssi index"); /* reverse complement indicated by coords. */ /* -c 52: would be 52,0, so watch out for given_end = 0 case */ if (given_end != 0 && given_start > given_end) { start = given_end; end = given_start; do_revcomp = TRUE; } else { start = given_start; end = given_end; do_revcomp = FALSE; } if (esl_sqio_FetchSubseq(sqfp, key, start, end, sq) != eslOK) esl_fatal(esl_sqfile_GetErrorBuf(sqfp)); if (newname != NULL) esl_sq_SetName(sq, newname); else esl_sq_FormatName(sq, "%s/%d-%d", key, given_start, (given_end == 0) ? sq->L : given_end); /* Two ways we might have been asked to revcomp: by coord, or by -r option */ /* (If both happen, they'll cancel each other out) */ if (do_revcomp) if (esl_sq_ReverseComplement(sq) != eslOK) esl_fatal("Failed to reverse complement %s; is it a protein?\n", sq->name); if (esl_opt_GetBoolean(go, "-r")) if (esl_sq_ReverseComplement(sq) != eslOK) esl_fatal("Failed to reverse complement %s; is it a protein?\n", sq->name); esl_sqio_Write(ofp, sq, eslSQFILE_FASTA, FALSE); esl_sq_Destroy(sq); }
/* onefetch(): * Given one <key> (a seq name or accession), retrieve the corresponding sequence. * In SSI mode, we can do this quickly by positioning the file, then regurgitating * every line until the end-of-record marker; we don't even have to parse. * Without an SSI index, we have to parse the file sequentially 'til we find * the one we're after. */ static void onefetch(ESL_GETOPTS *go, FILE *ofp, char *key, ESL_SQFILE *sqfp) { ESL_SQ *sq = esl_sq_Create(); int do_revcomp = esl_opt_GetBoolean(go, "-r"); char *newname = esl_opt_GetString(go, "-n"); int status; /* Try to position the file at the desired sequence with SSI. */ if (sqfp->data.ascii.ssi != NULL) { status = esl_sqfile_PositionByKey(sqfp, key); if (status == eslENOTFOUND) esl_fatal("seq %s not found in SSI index for file %s\n", key, sqfp->filename); else if (status == eslEFORMAT) esl_fatal("Failed to parse SSI index for %s\n", sqfp->filename); else if (status != eslOK) esl_fatal("Failed to look up location of seq %s in SSI index of file %s\n", key, sqfp->filename); status = esl_sqio_Read(sqfp, sq); if (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", sqfp->filename, esl_sqfile_GetErrorBuf(sqfp)); else if (status == eslEOF) esl_fatal("Unexpected EOF reading sequence file %s", status, sqfp->filename); else if (status != eslOK) esl_fatal("Unexpected error %d reading sequence file %s", status, sqfp->filename); if (strcmp(key, sq->name) != 0 && strcmp(key, sq->acc) != 0) esl_fatal("whoa, internal error; found the wrong sequence %s, not %s", sq->name, key); } else { /* Else, we have to read the whole damn file sequentially until we find the seq */ while ((status = esl_sqio_Read(sqfp, sq)) != eslEOF) { if (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", sqfp->filename, esl_sqfile_GetErrorBuf(sqfp)); else if (status != eslOK) esl_fatal("Unexpected error %d reading sequence file %s", status, sqfp->filename); if (strcmp(key, sq->name) == 0 || strcmp(key, sq->acc) == 0) break; esl_sq_Reuse(sq); } if (status == eslEOF) esl_fatal("Failed to find sequence %s in file %s\n", key, sqfp->filename); } if (do_revcomp == FALSE && newname == NULL && ! esl_sqio_IsAlignment(sqfp->format)) { /* If we're not manipulating the sequence in any way, and it's not from an alignment file, we can Echo() it. */ if (esl_sqio_Echo(sqfp, sq, ofp) != eslOK) esl_fatal("Echo failed: %s\n", esl_sqfile_GetErrorBuf(sqfp)); } else { /* Otherwise we Write() the parsed version. */ if (do_revcomp && esl_sq_ReverseComplement(sq) != eslOK) esl_fatal("Failed to reverse complement %s; is it a protein?\n", sq->name); if (newname != NULL) esl_sq_SetName(sq, newname); esl_sqio_Write(ofp, sq, eslSQFILE_FASTA, FALSE); } esl_sq_Destroy(sq); }
/* init_master_cfg() * Called by masters, mpi or serial. * Already set: * cfg->hmmfile - command line arg 1 * cfg->alifile - command line arg 2 * cfg->postmsafile - option -O (default NULL) * cfg->fmt - format of alignment file * Sets: * cfg->afp - open alignment file * cfg->abc - digital alphabet * cfg->hmmfp - open HMM file * cfg->postmsafp - open MSA resave file, or NULL * * Errors in the MPI master here are considered to be "recoverable", * in the sense that we'll try to delay output of the error message * until we've cleanly shut down the worker processes. Therefore * errors return (code, errmsg) by the ESL_FAIL mech. */ static int init_master_cfg(const ESL_GETOPTS *go, struct cfg_s *cfg, char *errmsg) { int status; if (esl_opt_GetString(go, "-o") != NULL) { if ((cfg->ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) ESL_FAIL(eslFAIL, errmsg, "Failed to open -o output file %s\n", esl_opt_GetString(go, "-o")); } else cfg->ofp = stdout; status = esl_msafile_Open(cfg->alifile, cfg->fmt, NULL, &(cfg->afp)); if (status == eslENOTFOUND) ESL_FAIL(status, errmsg, "Alignment file %s doesn't exist or is not readable\n", cfg->alifile); else if (status == eslEFORMAT) ESL_FAIL(status, errmsg, "Couldn't determine format of alignment %s\n", cfg->alifile); else if (status != eslOK) ESL_FAIL(status, errmsg, "Alignment file open failed with error %d\n", status); if (esl_opt_GetBoolean(go, "--amino")) cfg->abc = esl_alphabet_Create(eslAMINO); else if (esl_opt_GetBoolean(go, "--dna")) cfg->abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--rna")) cfg->abc = esl_alphabet_Create(eslRNA); else { int type; status = esl_msafile_GuessAlphabet(cfg->afp, &type); if (status == eslEAMBIGUOUS) ESL_FAIL(status, errmsg, "Failed to guess the bio alphabet used in %s.\nUse --dna, --rna, or --amino option to specify it.", cfg->alifile); else if (status == eslEFORMAT) ESL_FAIL(status, errmsg, "Alignment file parse failed: %s\n", cfg->afp->errbuf); else if (status == eslENODATA) ESL_FAIL(status, errmsg, "Alignment file %s is empty\n", cfg->alifile); else if (status != eslOK) ESL_FAIL(status, errmsg, "Failed to read alignment file %s\n", cfg->alifile); cfg->abc = esl_alphabet_Create(type); } esl_msafile_SetDigital(cfg->afp, cfg->abc); if ((cfg->hmmfp = fopen(cfg->hmmfile, "w")) == NULL) ESL_FAIL(status, errmsg, "Failed to open HMM file %s for writing", cfg->hmmfile); if (cfg->postmsafile != NULL) { if ((cfg->postmsafp = fopen(cfg->postmsafile, "w")) == NULL) ESL_FAIL(status, errmsg, "Failed to MSA resave file %s for writing", cfg->postmsafile); } else cfg->postmsafp = NULL; output_header(go, cfg); /* with msa == NULL, output_result() prints the tabular results header, if needed */ output_result(cfg, errmsg, 0, NULL, NULL, NULL, 0.0); return eslOK; }
static void serial_master(ESL_GETOPTS *go, struct cfg_s *cfg) { P7_HMM *hmm = NULL; double *xv = NULL; /* results: array of N scores */ int *av = NULL; /* optional results: array of N alignment lengths */ char errbuf[eslERRBUFSIZE]; int status; if ((status = init_master_cfg(go, cfg, errbuf)) != eslOK) p7_Fail(errbuf); if ((xv = malloc(sizeof(double) * cfg->N)) == NULL) p7_Fail("allocation failed"); if (esl_opt_GetBoolean(go, "-a") && (av = malloc(sizeof(int) * cfg->N)) == NULL) p7_Fail("allocation failed"); while ((status = p7_hmmfile_Read(cfg->hfp, &(cfg->abc), &hmm)) != eslEOF) { if (status == eslEOD) p7_Fail("read failed, HMM file %s may be truncated?", cfg->hmmfile); else if (status == eslEFORMAT) p7_Fail("bad file format in HMM file %s", cfg->hmmfile); else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", cfg->hmmfile); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", cfg->hmmfile); if (cfg->bg == NULL) { if (esl_opt_GetBoolean(go, "--bgflat")) cfg->bg = p7_bg_CreateUniform(cfg->abc); else cfg->bg = p7_bg_Create(cfg->abc); p7_bg_SetLength(cfg->bg, esl_opt_GetInteger(go, "-L")); /* set the null model background length in both master and workers. */ } if (esl_opt_GetBoolean(go, "--recal")) { if (recalibrate_model(go, cfg, errbuf, hmm) != eslOK) p7_Fail(errbuf); } if (process_workunit(go, cfg, errbuf, hmm, xv, av) != eslOK) p7_Fail(errbuf); if (output_result (go, cfg, errbuf, hmm, xv, av) != eslOK) p7_Fail(errbuf); p7_hmm_Destroy(hmm); } free(xv); if (av != NULL) free(av); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_BG *bg = NULL; int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); /* First round of tests for DNA alphabets. */ if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if (esl_opt_GetBoolean(go, "-v")) printf("ViterbiFilter() tests, DNA\n"); utest_viterbi_filter(r, abc, bg, M, L, N); utest_viterbi_filter(r, abc, bg, 1, L, 10); utest_viterbi_filter(r, abc, bg, M, 1, 10); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); /* Second round of tests for amino alphabets. */ if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if (esl_opt_GetBoolean(go, "-v")) printf("ViterbiFilter() tests, protein\n"); utest_viterbi_filter(r, abc, bg, M, L, N); utest_viterbi_filter(r, abc, bg, 1, L, 10); utest_viterbi_filter(r, abc, bg, M, 1, 10); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); esl_getopts_Destroy(go); esl_randomness_Destroy(r); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = NULL; char *bitfile = esl_opt_GetString (go, "--bitfile"); int nbins = esl_opt_GetInteger(go, "-b"); int n = esl_opt_GetInteger(go, "-n"); int be_verbose = esl_opt_GetBoolean(go, "-v"); int seed = esl_opt_GetInteger(go, "-s"); if (esl_opt_GetBoolean(go, "-r")) r = esl_randomness_CreateTimeseeded(); else r = esl_randomness_Create(seed); utest_random(seed, n, nbins, be_verbose); utest_choose(r, n, nbins, be_verbose); if (bitfile != NULL) save_bitfile(bitfile, r, n); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
/* Fetch in a random sequence of length <L> from the the pre-digitized * concatenated sequence database, select a random subseq, shuffle it * by the chosen algorithm; set dsq[1..L] to the resulting randomized * segment. * * If <logfp> is non-NULL, append one or more "<sqname> <from> <to>" * fields to current line, to record where the random segment was * selected from. This is useful in cases where we want to track back * the origin of a high-scoring segment, in case the randomization * wasn't good enough to obscure the identity of a segment. * */ static int set_random_segment(ESL_GETOPTS *go, struct cfg_s *cfg, FILE *logfp, ESL_DSQ *dsq, int L) { ESL_SQ *sq = esl_sq_CreateDigital(cfg->abc); int minDPL = esl_opt_GetInteger(go, "--minDPL"); int db_dependent = (esl_opt_GetBoolean(go, "--iid") == TRUE ? FALSE : TRUE); char *pkey = NULL; int start, end; int64_t Lseq; int status; if (L==0) return eslOK; if (L > cfg->db_maxL) esl_fatal("can't fetch a segment of length %d; database max is %d\n", L, cfg->db_maxL); /* fetch a random subseq from the source database */ esl_sq_GrowTo(sq, L); if (db_dependent) { do { if (pkey != NULL) free(pkey); if (esl_ssi_FindNumber(cfg->dbfp->data.ascii.ssi, esl_rnd_Roll(cfg->r, cfg->db_nseq), NULL, NULL, NULL, &Lseq, &pkey) != eslOK) esl_fatal("failed to look up a random seq"); } while (Lseq < L); start = 1 + esl_rnd_Roll(cfg->r, Lseq-L); end = start + L - 1; if (esl_sqio_FetchSubseq(cfg->dbfp, pkey, start, end, sq) != eslOK) esl_fatal("failed to fetch subseq"); esl_sq_ConvertDegen2X(sq); } /* log sequence source info: <name> <start> <end> */ if (logfp != NULL && db_dependent) fprintf(logfp, " %-15s %5d %5d", pkey, start, end); /* Now apply the appropriate randomization algorithm */ if (esl_opt_GetBoolean(go, "--mono")) status = esl_rsq_XShuffle (cfg->r, sq->dsq, L, sq->dsq); else if (esl_opt_GetBoolean(go, "--di")) { if (L < minDPL) status = esl_rsq_XShuffle (cfg->r, sq->dsq, L, sq->dsq); else status = esl_rsq_XShuffleDP(cfg->r, sq->dsq, L, cfg->abc->Kp, sq->dsq); } else if (esl_opt_GetBoolean(go, "--markov0")) status = esl_rsq_XMarkov0 (cfg->r, sq->dsq, L, cfg->abc->Kp, sq->dsq); else if (esl_opt_GetBoolean(go, "--markov1")) status = esl_rsq_XMarkov1 (cfg->r, sq->dsq, L, cfg->abc->Kp, sq->dsq); else if (esl_opt_GetBoolean(go, "--reverse")) status = esl_rsq_XReverse (sq->dsq, L, sq->dsq); else if (esl_opt_GetBoolean(go, "--iid")) status = esl_rsq_xIID (cfg->r, cfg->fq, cfg->abc->K, L, sq->dsq); else status = eslEINCONCEIVABLE; if (status != eslOK) esl_fatal("esl's shuffling failed"); memcpy(dsq, sq->dsq+1, sizeof(ESL_DSQ) * L); esl_sq_Destroy(sq); free(pkey); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); int be_verbose = esl_opt_GetBoolean(go, "-v"); if (be_verbose) printf("p7_bg unit test: rng seed %" PRIu32 "\n", esl_randomness_GetSeed(rng)); utest_ReadWrite(rng); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
static int process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_fmfile, char **ret_qfile) { ESL_GETOPTS *go = esl_getopts_Create(options); int status; if (esl_opt_ProcessEnvironment(go) != eslOK) { if (printf("Failed to process environment: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) { if (printf("Failed to parse command line: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if (esl_opt_VerifyConfig(go) != eslOK) { if (printf("Failed to parse command line: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } /* help format: */ if (esl_opt_GetBoolean(go, "-h") == TRUE) { esl_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); if (puts("\nBasic options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 120=textwidth*/ if (puts("\nSpecial options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 2, 2, 80); /* 2= group; 2 = indentation; 120=textwidth*/ exit(0); } if (esl_opt_ArgNumber(go) != 2) { if (puts("Incorrect number of command line arguments.") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if ((*ret_qfile = esl_opt_GetArg(go, 1)) == NULL) { if (puts("Failed to get <qfile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if ((*ret_fmfile = esl_opt_GetArg(go, 2)) == NULL) { if (puts("Failed to get <fmfile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } /* Validate any attempted use of stdin streams */ if (esl_strcmp(*ret_fmfile, "-") == 0 && esl_strcmp(*ret_qfile, "-") == 0) { if (puts("Either <fmfile> or <qfile> may be '-' (to read from stdin), but not both.") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } *ret_go = go; return eslOK; FAILURE: /* all errors handled here are user errors, so be polite. */ esl_usage(stdout, argv[0], usage); puts("\nwhere basic options are:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 80=textwidth*/ printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); esl_getopts_Destroy(go); exit(1); ERROR: if (go) esl_getopts_Destroy(go); exit(status); }
static void process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_hmmfile, char **ret_alifile) { ESL_GETOPTS *go = NULL; if ((go = esl_getopts_Create(options)) == NULL) p7_Die("problem with options structure"); if (esl_opt_ProcessEnvironment(go) != eslOK) { printf("Failed to process environment: %s\n", go->errbuf); goto ERROR; } if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); goto ERROR; } if (esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); goto ERROR; } /* help format: */ if (esl_opt_GetBoolean(go, "-h") == TRUE) { p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); puts("\nwhere basic options are:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); puts("\nOptions for selecting alphabet rather than guessing it:"); esl_opt_DisplayHelp(stdout, go, 2, 2, 80); puts("\nAlternative model construction strategies:"); esl_opt_DisplayHelp(stdout, go, 3, 2, 80); puts("\nAlternative relative sequence weighting strategies:"); esl_opt_DisplayHelp(stdout, go, 4, 2, 80); puts("\nAlternate effective sequence weighting strategies:"); esl_opt_DisplayHelp(stdout, go, 5, 2, 80); puts("\nControl of E-value calibration:"); esl_opt_DisplayHelp(stdout, go, 6, 2, 80); puts("\nOther options:"); esl_opt_DisplayHelp(stdout, go, 8, 2, 80); exit(0); } if (esl_opt_ArgNumber(go) != 2) { puts("Incorrect number of command line arguments."); goto ERROR; } if ((*ret_hmmfile = esl_opt_GetArg(go, 1)) == NULL) { puts("Failed to get <hmmfile> argument on command line"); goto ERROR; } if ((*ret_alifile = esl_opt_GetArg(go, 2)) == NULL) { puts("Failed to get <alifile> argument on command line"); goto ERROR; } *ret_go = go; return; ERROR: /* all errors handled here are user errors, so be polite. */ esl_usage(stdout, argv[0], usage); puts("\nwhere basic options are:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); printf("\nTo see more help on other available options, do %s -h\n\n", argv[0]); exit(1); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); int be_verbose = esl_opt_GetBoolean(go, "-v"); int N = esl_opt_GetInteger(go, "-N"); if (be_verbose) printf("seed = %" PRIu32 "\n", esl_randomness_GetSeed(r)); utest_LogGamma(r, N, be_verbose); utest_LinearRegression(r, TRUE, be_verbose); utest_LinearRegression(r, FALSE, be_verbose); esl_getopts_Destroy(go); esl_randomness_Destroy(r); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); int my_rank; int nproc; /* For debugging: stall until GDB can be attached */ if (esl_opt_GetBoolean(go, "--stall")) pause(); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); utest_oprofileSendRecv(my_rank, nproc); MPI_Finalize(); return 0; }
/* Forward is hard to validate. * We do know that the Forward score is >= Viterbi. * We also know that the expected score on random seqs is <= 0 (not * exactly - we'd have to sample the random length from the background * model too, not just use a fixed L - but it's close enough to * being true to be a useful test.) */ static void utest_forward(ESL_GETOPTS *go, ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, P7_PROFILE *gm, int nseq, int L) { float avg_sc; ESL_DSQ *dsq = NULL; P7_GMX *fwd = NULL; P7_GMX *bck = NULL; int idx; float fsc, bsc; float vsc, nullsc; if ((dsq = malloc(sizeof(ESL_DSQ) *(L+2))) == NULL) esl_fatal("malloc failed"); if ((fwd = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("matrix creation failed"); if ((bck = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("matrix creation failed"); avg_sc = 0.; for (idx = 0; idx < nseq; idx++) { if (esl_rsq_xfIID(r, bg->f, abc->K, L, dsq) != eslOK) esl_fatal("seq generation failed"); if (p7_GViterbi(dsq, L, gm, fwd, &vsc) != eslOK) esl_fatal("viterbi failed"); if (p7_GForward(dsq, L, gm, fwd, &fsc) != eslOK) esl_fatal("forward failed"); if (p7_GBackward(dsq, L, gm, bck, &bsc) != eslOK) esl_fatal("backward failed"); if (fsc < vsc) esl_fatal("Foward score can't be less than Viterbi score"); if (fabs(fsc-bsc) > 0.001) esl_fatal("Forward/Backward failed: %f %f\n", fsc, bsc); if (p7_bg_NullOne(bg, dsq, L, &nullsc) != eslOK) esl_fatal("null score failed"); avg_sc += fsc - nullsc; if (esl_opt_GetBoolean(go, "--vv")) printf("utest_forward: Forward score: %.4f (total so far: %.4f)\n", fsc, avg_sc); } avg_sc /= (float) nseq; if (avg_sc > 0.) esl_fatal("Forward scores have positive expectation (%f nats)", avg_sc); p7_gmx_Destroy(fwd); p7_gmx_Destroy(bck); free(dsq); return; }