int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); P7_COORDS2 *c2 = p7_coords2_Create(0, 0); P7_COORDS2_HASH *hash = p7_coords2_hash_Create(0, 0, 0); int L = 20; int maxseg = 1; int nsamples = 1000; int32_t *wrk = NULL; int32_t keyidx; int i; for (i = 0; i < nsamples; i++) { p7_coords2_Sample(rng, c2, maxseg, L, &wrk); p7_coords2_hash_Store(hash, c2, &keyidx); p7_coords2_Reuse(c2); } p7_coords2_hash_Dump(stdout, hash); if (wrk) free(wrk); p7_coords2_hash_Destroy(hash); p7_coords2_Destroy(c2); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); P7_ANCHORS *anch = p7_anchors_Create(); P7_ANCHORHASH *ah = p7_anchorhash_Create(); int L = 400; int M = 200; int maxD = 50; int nsamples = 1000; int32_t keyidx; int s; for (s = 0; s < nsamples; s++) { p7_anchors_Sample(rng, L, M, maxD, anch); p7_anchorhash_Store(ah, anch, 0, &keyidx); p7_anchors_Reuse(anch); } p7_anchorhash_Dump(stdout, ah); p7_anchorhash_Destroy(ah); p7_anchors_Destroy(anch); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; ESL_RANDOMNESS *r = NULL; int be_verbose; go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) esl_fatal("%s", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { esl_usage(stdout, argv[0], usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ exit(0); } if (esl_opt_ArgNumber(go) != 0) { printf("Incorrect number of command line arguments.\n"); esl_usage(stdout, argv[0], usage); exit(1); } be_verbose = esl_opt_GetBoolean(go, "-v"); if (esl_opt_GetBoolean(go, "-r")) { r = esl_randomness_CreateTimeseeded(); if (be_verbose) printf("seed = %ld\n", esl_randomness_GetSeed(r)); } else r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); utest_LinearRegression(r, TRUE, be_verbose); utest_LinearRegression(r, FALSE, be_verbose); esl_getopts_Destroy(go); esl_randomness_Destroy(r); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* application configuration */ ESL_RANDOMNESS *r = NULL; /* random number generator */ FILE *ofp = NULL; /* data output stream */ int outfmt = eslSQFILE_FASTA; /* Parse command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in app configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") ) cmdline_help(argv[0], go); /* Open the output data file, if any */ if (esl_opt_GetString(go, "-o") != NULL) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) esl_fatal("Failed to open output file %s\n", esl_opt_GetString(go, "-o")); } else ofp = stdout; /* Initialize */ r = esl_randomness_Create(esl_opt_GetInteger(go, "--seed")); /* Hand off execution to one of the three modes */ if (esl_opt_GetBoolean(go, "-A")) /* Alignment shuffling */ { if (esl_opt_ArgNumber(go) != 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); msa_shuffling(go, r, ofp, outfmt); } else if (esl_opt_GetBoolean(go, "-G")) /* Sequence generation */ { if (esl_opt_ArgNumber(go) != 0) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); seq_generation(go, r, ofp, outfmt); } else if (esl_opt_GetBoolean(go, "-S")) /* Sequence shuffling */ { if (esl_opt_ArgNumber(go) != 1) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); seq_shuffling(go, r, ofp, outfmt); } if (esl_opt_GetString(go, "-o") != NULL) fclose(ofp); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(void) { ESL_RANDOMNESS *r = esl_randomness_Create(42); int n = 10; printf("A sequence of %d pseudorandom numbers:\n", n); while (n--) printf("%f\n", esl_random(r)); esl_randomness_Destroy(r); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(0); char *msafile = esl_opt_GetArg(go, 1); int fmt = eslMSAFILE_UNKNOWN; ESL_ALPHABET *abc = NULL; ESLX_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; int textmode = esl_opt_GetBoolean(go, "--text"); int nali = 0; int status; /* If you know the alphabet you want, create it - you'll pass it to eslx_msafile_Open() */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); /* Open in text or digital mode. * To let the Open() function autoguess the format, you pass <infmt=eslMSAFILE_UNKNOWN>. * To let it autoguess the alphabet, you set <abc=NULL> and pass <&abc>. * To open in text mode instead of digital, you pass <NULL> for the alphabet argument. * eslx_msafile_OpenFailure() is a convenience, printing various diagnostics of any * open failure to <stderr>. You can of course handle your own diagnostics instead. */ if (textmode) status = eslx_msafile_Open(NULL, msafile, NULL, fmt, NULL, &afp); else status = eslx_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp); if (status != eslOK) eslx_msafile_OpenFailure(afp, status); fmt = afp->format; while ((status = eslx_msafile_Read(afp, &msa)) == eslOK) { /* if digital MSA: msa->ax[idx=0..nseq-1][acol=1..alen] is the alignment data; * if text MSA: msa->aseq[idx=0..nseq-1][acol=0..alen-1] */ nali++; /* permute it */ esl_msashuffle_PermuteSequenceOrder(rng, msa); eslx_msafile_Write(stdout, msa, fmt); esl_msa_Destroy(msa); } if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status); /* a convenience, like eslx_msafile_OpenFailure() */ esl_alphabet_Destroy(abc); eslx_msafile_Close(afp); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));; utest_logf(go); utest_expf(go); utest_odds(go, r); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_RANDOMNESS *r; /* source of random numbers */ ESL_HISTOGRAM *h; /* histogram to store the data */ ESL_HYPEREXP *hxp; /* hyperexponential to sample from */ ESL_HYPEREXP *ehxp; /* estimated hyperexponential */ double x; /* sampled data point */ int n = 100000; /* number of samples */ double *data; int ndata; int i; hxp = esl_hyperexp_Create(3); hxp->mu = -2.0; hxp->q[0] = 0.6; hxp->q[1] = 0.3; hxp->q[2] = 0.1; hxp->lambda[0] = 1.0; hxp->lambda[1] = 0.3; hxp->lambda[2] = 0.1; r = esl_randomness_Create(0); h = esl_histogram_CreateFull(hxp->mu, 100, 1.0); for (i = 0; i < n; i++) { x = esl_hxp_Sample(r, hxp); esl_histogram_Add(h, x); } esl_histogram_GetData(h, &data, &ndata); /* Plot the empirical (sampled) and expected survivals */ esl_histogram_PlotSurvival(stdout, h); esl_hxp_Plot(stdout, hxp, &esl_hxp_surv, h->xmin, h->xmax, 0.1); /* ML fit to complete data, and plot fitted survival curve */ ehxp = esl_hyperexp_Create(3); esl_hxp_FitGuess(data, ndata, ehxp); esl_hxp_FitComplete(data, ndata, ehxp); esl_hxp_Plot(stdout, ehxp, &esl_hxp_surv, h->xmin, h->xmax, 0.1); /* ML fit to binned data, plot fitted survival curve */ esl_hxp_FitGuessBinned(h, ehxp); esl_hxp_FitCompleteBinned(h, ehxp); esl_hxp_Plot(stdout, ehxp, &esl_hxp_surv, h->xmin, h->xmax, 0.1); esl_randomness_Destroy(r); esl_histogram_Destroy(h); esl_hyperexp_Destroy(hxp); esl_hyperexp_Destroy(ehxp); return 0; }
/* stats_fittest() * Samples <n> numbers from a GEV w/ parameters <mu>, <lambda>, <alpha>; * then fits to a GEV and print info about how good the fit is. * * Repeat this <ntrials> times. * * For each trial, outputs a line to <fp>: * <trial> <nll> <est_nll> <est_mu> <mu %error> <est_lambda> <%err>\ * <est_alpha> <%err> <est E-val at parametric E=1> * * Each sampled set is done with the random number generator seeded to * the trial number. This should make each set reproducible and * identical to the sets used to test R's fitting. * * xref STL9/191; xref 2005/0718-weibull-debugging */ static int stats_fittest(FILE *fp, int ntrials, int n, double mu, double lambda, double alpha) { ESL_RANDOMNESS *r = NULL; double *x = NULL; int i; int trial; double est_mu, est_lambda, est_alpha; double z; double nll, est_nll; int status; ESL_ALLOC(x, sizeof(double) * n); for (trial = 1; trial <= ntrials; trial++) { r = esl_randomness_Create(trial); nll = 0.; for (i = 0; i < n; i++) { x[i] = esl_gev_Sample(r, mu, lambda, alpha); nll -= esl_gev_logpdf(x[i], mu, lambda, alpha); } esl_randomness_Destroy(r); esl_gev_FitComplete(x, n, &est_mu, &est_lambda, &est_alpha); est_nll = 0.; for (i = 0; i < n; i++) est_nll -= esl_gev_logpdf(x[i], est_mu, est_lambda, est_alpha); z = mu + (exp(-alpha*log(1/(double)n)) - 1 ) / (alpha*lambda);/* x at E=1*/ z = (double) n * esl_gev_surv(z, est_mu, est_lambda, est_alpha); /* E at x */ printf("%4d %10.2f %10.2f %8.3f %8.3f %8.5f %8.3f %8.5f %8.3f %6.4f\n", trial, nll, est_nll, est_mu, 100* fabs((est_mu-mu)/mu), est_lambda, 100* fabs((est_lambda-lambda)/lambda), est_alpha, 100* fabs((est_alpha-alpha)/alpha), z); } free(x); return eslOK; ERROR: return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); int be_verbose = esl_opt_GetBoolean(go, "-v"); int N = esl_opt_GetInteger(go, "-N"); if (be_verbose) printf("seed = %" PRIu32 "\n", esl_randomness_GetSeed(r)); utest_LogGamma(r, N, be_verbose); utest_LinearRegression(r, TRUE, be_verbose); utest_LinearRegression(r, FALSE, be_verbose); esl_getopts_Destroy(go); esl_randomness_Destroy(r); exit(0); }
/* Function: esl_dst_XAverageId() * Synopsis: Calculate avg identity for digital MSA * Incept: SRE, Fri May 18 15:19:14 2007 [Janelia] * * Purpose: Calculates the average pairwise fractional identity in * a digital multiple sequence alignment <ax>, consisting of <N> * aligned digital sequences of identical length. * * If an exhaustive calculation would require more than * <max_comparisons> pairwise comparisons, then instead of * looking at all pairs, calculate the average over a * stochastic sample of <max_comparisons> random pairs. * This allows the routine to work efficiently even on very * deep MSAs. * * Each fractional pairwise identity (range $[0..$ pid $..1]$ * is calculated using <esl_dst_XPairId()>. * * Returns: <eslOK> on success, and <*ret_id> contains the average * fractional identity. * * Throws: <eslEMEM> on allocation failure. * <eslEINVAL> if any of the aligned sequence pairs aren't * of the same length. * In either case, <*ret_id> is set to 0. */ int esl_dst_XAverageId(const ESL_ALPHABET *abc, ESL_DSQ **ax, int N, int max_comparisons, double *ret_id) { int status; double id; double sum = 0.; int i,j,n; if (N <= 1) { *ret_id = 1.; return eslOK; } *ret_id = 0.; /* Is N small enough that we can average over all pairwise comparisons? watch out for numerical overflow in this: Pfam N's easily overflow when squared */ if (N <= max_comparisons && N <= sqrt(2. * max_comparisons) && (N * (N-1) / 2) <= max_comparisons) { for (i = 0; i < N; i++) for (j = i+1; j < N; j++) { if ((status = esl_dst_XPairId(abc, ax[i], ax[j], &id, NULL, NULL)) != eslOK) return status; sum += id; } sum /= (double) (N * (N-1) / 2); } /* If nseq is large, calculate average over a stochastic sample. */ else { ESL_RANDOMNESS *r = esl_randomness_Create(0); for (n = 0; n < max_comparisons; n++) { do { i = esl_rnd_Roll(r, N); j = esl_rnd_Roll(r, N); } while (j == i); /* make sure j != i */ if ((status = esl_dst_XPairId(abc, ax[i], ax[j], &id, NULL, NULL)) != eslOK) return status; sum += id; } sum /= (double) max_comparisons; esl_randomness_Destroy(r); } *ret_id = sum; return eslOK; }
int main(int argc, char **argv) { double mu = -50.0; double lambda = 2.5; double tau = 0.7; ESL_HISTOGRAM *h = esl_histogram_CreateFull(mu, 100., 0.1); ESL_RANDOMNESS *r = esl_randomness_Create(0); int n = 10000; double *data; int ndata; double emu, elam, etau; int i; double x; for (i = 0; i < n; i++) { x = esl_sxp_Sample(r, mu, lambda, tau); esl_histogram_Add(h, x); } esl_histogram_GetData(h, &data, &ndata); /* Plot the empirical (sampled) and expected survivals */ esl_histogram_PlotSurvival(stdout, h); esl_sxp_Plot(stdout, mu, lambda, tau, &esl_sxp_surv, h->xmin, h->xmax, 0.1); /* ML fit to complete data, and plot fitted survival curve */ esl_sxp_FitComplete(data, ndata, &emu, &elam, &etau); esl_sxp_Plot(stdout, emu, elam, etau, &esl_sxp_surv, h->xmin, h->xmax, 0.1); /* ML fit to binned data, plot fitted survival curve */ esl_sxp_FitCompleteBinned(h, &emu, &elam, &etau); esl_sxp_Plot(stdout, emu, elam, etau, &esl_sxp_surv, h->xmin, h->xmax, 0.1); esl_randomness_Destroy(r); esl_histogram_Destroy(h); return 0; }
/* stats_sample() * Creates an R input table containing 10,000 random samples * each in columns labeled "gumbel", "frechet", "weibull". * To process in R (remember that R uses 1/lambda for scale): library(ismev) library(evd) z=read.table("stats.7") x1 <- sort(z$gumbel, decreasing=T) x2 <- sort(z$frechet, decreasing=T) x3 <- sort(z$weibull, decreasing=T) q1 <- qgumbel(ppoints(10000), -20., 1./0.4) q2 <- qgev(ppoints(10000), -20., 1./0.4, 0.2) q3 <- qgev(ppoints(10000), -20., 1./0.4, -0.2) xax<- seq(-40,40,by=0.1) a1 <- dgumbel(xax, -20, 1/0.4) a2 <- dgev(xax, -20, 1/0.4, 0.2) a3 <- dgev(xax, -20, 1/0.4, -0.2) qqplot(x1,q1); abline(0,1) qqplot(x2,q2); abline(0,1) qqplot(x3,q3); abline(0,1) plot(density(x1,bw=0.2)); lines(xax,a1) plot(density(x2,bw=0.2)); lines(xax,a2) plot(density(x3,bw=0.2)); lines(xax,a3) */ static void stats_sample(FILE *fp) { ESL_RANDOMNESS *r; double mu = -20.; double lambda = 0.4; int n = 10000; double a,b,c; int i; r = esl_randomness_Create(42); fprintf(fp, " gumbel \t frechet\t weibull\n"); for (i = 1; i <= n; i++) { a = esl_gev_Sample(r, mu, lambda, 0.0); b = esl_gev_Sample(r, mu, lambda, 0.2); c = esl_gev_Sample(r, mu, lambda, -0.2); fprintf(fp, "%d\t%8.4f\t%8.4f\t%8.4f\n", i, a,b,c); } esl_randomness_Destroy(r); }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); int M = 50; fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); utest_generation (rng, M, abc, 10); // test a bunch of seqs to try to make sure we exercise exact domain score recalculation utest_singlemulti(rng, M, abc, 10); fprintf(stderr, "# status = ok\n"); esl_alphabet_Destroy(abc); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { double est_mu, est_lambda, est_alpha; double z; int i; int n = 10000; /* simulate 10,000 samples */ double mu = -20.0; /* with mu = -20 */ double lambda = 0.4; /* and lambda = 0.4 */ double alpha = 0.1; /* and alpha = 0.1 */ double min = 9999.; double max = -9999.; double *x = malloc(sizeof(double) * n); ESL_RANDOMNESS *r = esl_randomness_Create(0);; for (i = 0; i < n; i++) /* generate the 10,000 samples */ { x[i] = esl_gev_Sample(r, mu, lambda, alpha); if (x[i] < min) min = x[i]; if (x[i] > max) max = x[i]; } z = esl_gev_surv(max, mu, lambda, alpha); /* right tail p~1e-4 >= max */ printf("max = %6.1f P(>max) = %g E=%6.3f\n", max, z, z*(double)n); z = esl_gev_cdf(min, mu, lambda, alpha); /* left tail p~1e-4 < min */ printf("min = %6.1f P(<=min) = %g E=%6.3f\n", min, z, z*(double)n); esl_gev_FitComplete(x, n, &est_mu, &est_lambda, &est_alpha); printf("Parametric mu = %6.1f. Estimated mu = %6.2f. Difference = %.1f%%.\n", mu, est_mu, 100. * fabs((est_mu - mu) / mu)); printf("Parametric lambda = %6.2f. Estimated lambda = %6.2f. Difference = %.1f%%.\n", lambda, est_lambda, 100. * fabs((est_lambda - lambda) /lambda)); printf("Parametric alpha = %6.4f. Estimated alpha = %6.4f. Difference = %.1f%%.\n", alpha, est_alpha, 100. * fabs((est_alpha - alpha) /alpha)); free(x); esl_randomness_Destroy(r); return 0; }
/* Function: esl_dst_CAverageId() * Synopsis: Calculate avg identity for multiple alignment * Incept: SRE, Fri May 18 15:02:38 2007 [Janelia] * * Purpose: Calculates the average pairwise fractional identity in * a multiple sequence alignment <as>, consisting of <N> * aligned character sequences of identical length. * * If an exhaustive calculation would require more than * <max_comparisons> pairwise comparisons, then instead of * looking at all pairs, calculate the average over a * stochastic sample of <max_comparisons> random pairs. * This allows the routine to work efficiently even on very * deep MSAs. * * Each fractional pairwise identity (range $[0..$ pid $..1]$ * is calculated using <esl_dst_CPairId()>. * * Returns: <eslOK> on success, and <*ret_id> contains the average * fractional identity. * * Throws: <eslEMEM> on allocation failure. * <eslEINVAL> if any of the aligned sequence pairs aren't * of the same length. * In either case, <*ret_id> is set to 0. */ int esl_dst_CAverageId(char **as, int N, int max_comparisons, double *ret_id) { int status; double id; double sum; int i,j,n; if (N <= 1) { *ret_id = 1.; return eslOK; } *ret_id = 0.; /* Is nseq small enough that we can average over all pairwise comparisons? */ if ((N * (N-1) / 2) <= max_comparisons) { for (i = 0; i < N; i++) for (j = i+1; j < N; j++) { if ((status = esl_dst_CPairId(as[i], as[j], &id, NULL, NULL)) != eslOK) return status; sum += id; } id /= (double) (N * (N-1) / 2); } /* If nseq is large, calculate average over a stochastic sample. */ else { ESL_RANDOMNESS *r = esl_randomness_Create(0); for (n = 0; n < max_comparisons; n++) { do { i = esl_rnd_Roll(r, N); j = esl_rnd_Roll(r, N); } while (j == i); /* make sure j != i */ if ((status = esl_dst_CPairId(as[i], as[j], &id, NULL, NULL)) != eslOK) return status; sum += id; } id /= (double) max_comparisons; esl_randomness_Destroy(r); } *ret_id = id; return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = NULL; char *bitfile = esl_opt_GetString (go, "--bitfile"); int nbins = esl_opt_GetInteger(go, "-b"); int n = esl_opt_GetInteger(go, "-n"); int be_verbose = esl_opt_GetBoolean(go, "-v"); int seed = esl_opt_GetInteger(go, "-s"); if (esl_opt_GetBoolean(go, "-r")) r = esl_randomness_CreateTimeseeded(); else r = esl_randomness_Create(seed); utest_random(seed, n, nbins, be_verbose); utest_choose(r, n, nbins, be_verbose); if (bitfile != NULL) save_bitfile(bitfile, r, n); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(void) { ESL_RANDOMNESS *r = esl_randomness_Create(0); double a = -3.; double b = 1.; double xori = -20.; double xstep = 1.0; double setsigma = 1.0; /* sigma on all points */ int n = 100; double *x = malloc(sizeof(double) * n); double *y = malloc(sizeof(double) * n); double *sigma = malloc(sizeof(double) * n); int i; double ae, be, siga, sigb, cov_ab, cc, Q; /* Simulate some linear data, with Gaussian noise added to y_i */ for (i = 0; i < n; i++) { sigma[i] = setsigma; x[i] = xori + i*xstep; y[i] = esl_rnd_Gaussian(r, a + b*x[i], sigma[i]); } if (esl_stats_LinearRegression(x, y, sigma, n, &ae, &be, &siga, &sigb, &cov_ab, &cc, &Q) != eslOK) esl_fatal("linear regression failed"); printf("estimated intercept a = %8.4f [true = %8.4f]\n", ae, a); printf("estimated slope b = %8.4f [true = %8.4f]\n", be, b); printf("estimated sigma on a = %8.4f\n", siga); printf("estimated sigma on b = %8.4f\n", sigb); printf("estimated cov(a,b) = %8.4f\n", cov_ab); printf("correlation coeff = %8.4f\n", cc); printf("P-value = %8.4f\n", Q); free(x); free(y); free(sigma); esl_randomness_Destroy(r); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; ESL_STOPWATCH *w = esl_stopwatch_Create(); struct cfg_s cfg; p7_Init(); /* Process command line options. */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_GetBoolean(go, "-h") == TRUE) { p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); puts("\nCommon options:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1=docgroup, 2 = indentation; 80=textwidth*/ puts("\nChoice of score type :"); esl_opt_DisplayHelp(stdout, go, 2, 2, 80); puts("\nChoice of alignment mode :"); esl_opt_DisplayHelp(stdout, go, 3, 2, 80); puts("\nChoice of multi vs. unihit config :"); esl_opt_DisplayHelp(stdout, go, 4, 2, 80); puts("\nChoice of generic vs. vector DP implementation :"); esl_opt_DisplayHelp(stdout, go, 5, 2, 80); puts("\nOutput options (use only in serial mode, for single HMM input):"); esl_opt_DisplayHelp(stdout, go, 6, 2, 80); puts("\nControlling range of fitted tail masses :"); esl_opt_DisplayHelp(stdout, go, 7, 2, 80); puts("\nRecalibrating E-values, and replacing HMM's existing parameters :"); esl_opt_DisplayHelp(stdout, go, 8, 2, 80); puts("\nDebugging :"); esl_opt_DisplayHelp(stdout, go, 9, 2, 80); puts("\nExperiments :"); esl_opt_DisplayHelp(stdout, go, 10, 2, 80); exit(0); } if (esl_opt_ArgNumber(go) != 1) { puts("Incorrect number of command line arguments."); esl_usage(stdout, argv[0], usage); puts("\nwhere general options are:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1=docgroup, 2 = indentation; 80=textwidth*/ printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } /* Validate combinations of score/config/implementation (4x3x2x2, score x mode x hit x DPimpl: 20 of 48 possible combos valid */ if (esl_opt_GetBoolean(go, "--vector") && ! esl_opt_GetBoolean(go, "--local")) p7_Fail("SIMD vector implementations only work for local alignment."); /* -16/48 */ if (esl_opt_GetBoolean(go, "--msv") && ! esl_opt_GetBoolean(go, "--local")) p7_Fail("MSV scoring is local by definition: use --local."); /* -4/48 */ if (esl_opt_GetBoolean(go, "--vit") && ! esl_opt_GetBoolean(go, "--local")) p7_Fail("no p7_GViterbiDual for new dual-mode profile implemented yet"); /* -4/48 */ /* Initialize configuration shared across all kinds of masters * and workers in this .c file. */ cfg.hmmfile = esl_opt_GetArg(go, 1); cfg.r = esl_randomness_Create(esl_opt_GetInteger(go, "--seed")); // cfg.abc = esl_alphabet_Create(eslAMINO); cfg.my_rank = 0; /* MPI init will change this soon, if --mpi was set */ cfg.nproc = 0; /* MPI init will change this soon, if --mpi was set */ cfg.do_mpi = FALSE; /* --mpi will change this soon (below) if necessary */ cfg.do_stall = esl_opt_GetBoolean(go, "--stall"); cfg.N = esl_opt_GetInteger(go, "-N"); cfg.L = esl_opt_GetInteger(go, "-L"); cfg.hfp = NULL; cfg.ofp = NULL; cfg.survfp = NULL; cfg.efp = NULL; cfg.ffp = NULL; cfg.xfp = NULL; cfg.alfp = NULL; cfg.bg = NULL; /* This is our stall point, if we need to wait until we get a * debugger attached to this process for debugging (especially * useful for MPI): */ while (cfg.do_stall); /* Start timing. */ esl_stopwatch_Start(w); /* Main body: * Handed off to serial version or MPI masters and workers as appropriate. */ #ifdef HAVE_MPI if (esl_opt_GetBoolean(go, "--mpi")) { /* Initialize MPI, figure out who we are, and whether we're running * this show (proc 0) or working in it (procs >0). */ cfg.do_mpi = TRUE; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &(cfg.my_rank)); MPI_Comm_size(MPI_COMM_WORLD, &(cfg.nproc)); if (cfg.my_rank == 0 && cfg.nproc < 2) p7_Fail("Need at least 2 MPI processes to run --mpi mode."); if (cfg.my_rank > 0) mpi_worker(go, &cfg); else mpi_master(go, &cfg); esl_stopwatch_Stop(w); esl_stopwatch_MPIReduce(w, 0, MPI_COMM_WORLD); MPI_Finalize(); /* both workers and masters reach this line */ } else #endif /*HAVE_MPI*/ { /* No MPI? Then we're just the serial master. */ serial_master(go, &cfg); esl_stopwatch_Stop(w); } /* Stop timing. */ if (cfg.my_rank == 0) esl_stopwatch_Display(stdout, w, "# CPU time: "); /* Clean up and exit. */ if (cfg.my_rank == 0) { if (cfg.hfp != NULL) p7_hmmfile_Close(cfg.hfp); if (esl_opt_IsOn(go, "-o")) fclose(cfg.ofp); if (cfg.survfp != NULL) fclose(cfg.survfp); if (cfg.efp != NULL) fclose(cfg.efp); if (cfg.ffp != NULL) fclose(cfg.ffp); if (cfg.xfp != NULL) fclose(cfg.xfp); if (cfg.alfp != NULL) fclose(cfg.alfp); } p7_bg_Destroy(cfg.bg); esl_alphabet_Destroy(cfg.abc); esl_randomness_Destroy(cfg.r); esl_getopts_Destroy(go); esl_stopwatch_Destroy(w); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS*r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET*abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm1, *gm2; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N") / SSE16_NVALS; int MaxPart = esl_opt_GetInteger(go, "-M"); int NROUNDS = esl_opt_GetInteger(go, "-R"); int check = esl_opt_GetBoolean(go, "-c"); __m128 resdata[10]; int i, j; float *sc1 = (float*) resdata; ESL_SQFILE *sqfp = NULL; DATA_COPS16 *dcops; struct timeb tbstart, tbend; int sumlengths = 0; float* results = NULL; srand(time(NULL)); if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm1 = p7_profile_Create(hmm->M, abc); gm2 = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm1, L, p7_UNILOCAL); p7_ProfileConfig(hmm, bg, gm2, L, p7_UNILOCAL); dcops = p7_ViterbiCOPSw_Create(gm1); p7_ViterbiCOPSW_Setup(dcops, L+100, MaxPart); // use max L dcops->L = L; int dbsize = SSE16_NVALS*N; SEQ **seqsdb= calloc(dbsize+1, sizeof(SEQ*)); int equallength = 1; if (esl_sqfile_OpenDigital(abc, seqfile, eslSQFILE_FASTA, NULL, &sqfp) == eslOK) { // Use Sequence file ESL_SQ* sq = esl_sq_CreateDigital(abc); int maxseqs, len=0; if (esl_opt_IsDefault(go, "-N")) // N not specified in cmdline maxseqs = INT_MAX; // no limit else maxseqs = SSE16_NVALS*N; // use cmdline limit for (j = 0; j < maxseqs && esl_sqio_Read(sqfp, sq) == eslOK; j++) { if (equallength && sq->n != len && j > 0) equallength = 0; len = sq->n; if (j > dbsize) { seqsdb = realloc(seqsdb, 2*(dbsize+1)*sizeof(SEQ*)); dbsize *= 2; } ESL_DSQ* dsq = sq->dsq; seqsdb[j] = malloc(sizeof(SEQ)); seqsdb[j]->length = len; seqsdb[j]->seq = malloc((len+4)*sizeof(ESL_DSQ)); memcpy(seqsdb[j]->seq, dsq, len+2); sumlengths += len; esl_sq_Reuse(sq); } N = j/SSE16_NVALS; } else // Not found database. Generate random sequences for (i = 0; i < N; i++) for (j = 0; j < SSE16_NVALS; j++) { int len = L; // - rand()%1000; seqsdb[i*SSE16_NVALS+j] = malloc(sizeof(SEQ)); seqsdb[i*SSE16_NVALS+j]->seq = malloc(len+4); seqsdb[i*SSE16_NVALS+j]->length = len; esl_rsq_xfIID(r, bg->f, abc->K, len, seqsdb[i*SSE16_NVALS+j]->seq); sumlengths += len; } printf("Viterbi COPS Word with %d threads, model %s. ModelLen: %d, #Segms: %d, SeqL.: %d, #seqs: %d, Partition: %d, #parts: %d\n", NTHREADS, hmmfile, gm1->M, (int) ceil(gm1->M/SSE16_NVALS), L, SSE16_NVALS*N*NROUNDS, dcops->partition, dcops->Npartitions); /* // No. of partitions computed without full parallelism ( == no. of threads active while some are idle) int Niters_part = dcops->Npartitions % NTHREADS; // No. of Model lines that could be computed but are wasted by idle threads waiting on the end int Nwasted_threads = dcops->partition * ((NTHREADS-Niters_part) % NTHREADS); // No. of lines in the last partition that go beyond M. It's wasted comp time by a single thread int Nwasted_leftover= (dcops->partition - gm1->M % dcops->partition) % dcops->partition; // Total number of wasted lines int wastedcomp = Nwasted_threads + Nwasted_leftover; // Total number of lines computed and waited for int totalcomp = wastedcomp + gm1->M; // same as: roundtop(gm1->M, dcops->partition * NTHREADS); printf("Total Comp Lines: %d | Wasted Comp Lines: %d\n", totalcomp, wastedcomp); */ if (check) results = (float*) alloc_m128_aligned64((N+1)*2); ftime(&tbstart); if (!equallength) { // Sort sequences by length qsort(seqsdb, N*SSE16_NVALS, sizeof(SEQ*), compare_seqs); } for (j = 0; j < NROUNDS; j++) for (i = 0; i < N; i++) { // if (i % 1000 == 0) printf("Seq %d\n", i); p7_ViterbiCOPSw_run(dcops, seqsdb+i*SSE16_NVALS, sc1); if (check) memcpy(results+i*SSE16_NVALS, sc1, 32); // 32 bytes indeed! SSE16_NVALS floats } ftime(&tbend); double secs = TIMEDIFF(tbstart,tbend); w->elapsed = w->user = secs; esl_stopwatch_Display(stdout, w, "# Opt CPU time: "); double compmillioncells = NROUNDS * (double) sumlengths * (double) hmm->M * 1e-6; printf("# %.0fM cells in %.1f Mc/s\n", compmillioncells, compmillioncells / secs); if (check) { P7_OPROFILE *om = p7_oprofile_Create(hmm->M, gm1->abc); p7_oprofile_Convert(gm1, om); P7_OMX *ox = p7_omx_Create(hmm->M, 0, 0); printf("Compare results against base version\n"); for (i = 0; i < N; i++) { int maxll = 0; float sc2; for (j = 0; j < SSE16_NVALS; j++) if (maxll < seqsdb[i*SSE16_NVALS+j]->length) maxll = seqsdb[i*SSE16_NVALS+j]->length; p7_oprofile_ReconfigRestLength(om, maxll); // p7_ReconfigLength(gm2, maxll); // emulate the lock-step inter-sequence reconfigs for (j = 0; j < SSE16_NVALS; j++) { // p7_ReconfigLength(gm2, seqsdb[i*SSE16_NVALS+j]->length); // p7_Viterbi_unilocal(seqsdb[i*SSE16_NVALS+j]->seq, seqsdb[i*SSE16_NVALS+j]->length, gm2, &sc3); // p7_Viterbi_unilocal_word(seqsdb[i*SSE16_NVALS+j]->seq, seqsdb[i*SSE16_NVALS+j]->length, gm2, &sc2); // p7_oprofile_ReconfigLength(om, seqsdb[i*SSE16_NVALS+j]->length); p7_ViterbiFilter(seqsdb[i*SSE16_NVALS+j]->seq, seqsdb[i*SSE16_NVALS+j]->length, om, ox, &sc2); sc2 += 1.0; // -2.0nat optimization, Local to Unilocal mode if (fabs(results[i*SSE16_NVALS+j] - sc2) > 0.0001) { printf("Seq %d Len %4d: %f - %f\tdiff: %f\n", i*SSE16_NVALS+j, seqsdb[i*SSE16_NVALS+j]->length, results[i*SSE16_NVALS+j], sc2, fabs(results[i*SSE16_NVALS+j] - sc2)); } } } } return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; P7_ANCHORS *anch = p7_anchors_Create(); P7_ANCHORHASH *ah = p7_anchorhash_Create(); P7_ENVELOPES *env = p7_envelopes_Create(); P7_REFMX *rxf = NULL; P7_REFMX *rxd = NULL; P7_REFMX *afu = NULL; P7_REFMX *afd = NULL; P7_REFMX *apu = NULL; P7_REFMX *apd = NULL; P7_TRACE *tr = NULL; float *wrk = NULL; P7_MPAS_PARAMS prm; P7_MPAS_STATS stats; float fsc, vsc, asc, asc_b; int status; /* Read in one HMM */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Open sequence file */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); /* Read one sequence */ status = esl_sqio_Read(sqfp, sq); if (status == eslEFORMAT) p7_Fail("Parse failed (sequence file %s)\n%s\n", sqfp->filename, sqfp->get_error(sqfp)); else if (status != eslOK) p7_Fail("Unexpected error %d reading sequence file %s", status, sqfp->filename); esl_sqfile_Close(sqfp); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); p7_profile_Config(gm, hmm, bg); /* Set the profile and null model's target length models */ p7_bg_SetLength (bg, sq->n); p7_profile_SetLength(gm, sq->n); /* Allocate DP matrices and tracebacks */ rxf = p7_refmx_Create(gm->M, sq->n); rxd = p7_refmx_Create(gm->M, sq->n); tr = p7_trace_Create(); afu = p7_refmx_Create(gm->M, sq->n); afd = p7_refmx_Create(gm->M, sq->n); /* First pass analysis */ p7_ReferenceViterbi (sq->dsq, sq->n, gm, rxf, tr, &vsc); p7_ReferenceForward (sq->dsq, sq->n, gm, rxf, &fsc); p7_ReferenceBackward(sq->dsq, sq->n, gm, rxd, NULL); p7_ReferenceDecoding(sq->dsq, sq->n, gm, rxf, rxd, rxd); /* Customize MPAS parameters if you want; these are the defaults. */ prm.max_iterations = 1000; prm.loss_threshold = 0.001; prm.nmax_sampling = FALSE; prm.be_verbose = FALSE; /* MPAS algorithm gets us an anchor set */ p7_reference_Anchors(rng, sq->dsq, sq->n, gm, rxf, rxd, tr, &wrk, ah, afu, afd, anch, &asc, &prm, &stats); //printf("# ASC Forward UP:\n"); p7_refmx_Dump(stdout, afu); //printf("# ASC Forward DOWN:\n"); p7_refmx_Dump(stdout, afd); /* We no longer need rxf and rxd. * Use their space for apu/apd pair, which will briefly * hold ASC Backward matrices, then get used for ASC Decoding. */ apu = rxf; p7_refmx_Reuse(apu); apd = rxd; p7_refmx_Reuse(apd); p7_ReferenceASCBackward(sq->dsq, sq->n, gm, anch->a, anch->D, apu, apd, &asc_b); //printf("# Backward score (raw, nats): %.2f\n", asc_b); //printf("# ASC Backward UP:\n"); p7_refmx_Dump(stdout, apu); //printf("# ASC Backward DOWN:\n"); p7_refmx_Dump(stdout, apd); /* ASC Decoding takes afu/afd and abu/abd as input; * overwrites abu/abd with decoding matrices */ p7_ReferenceASCDecoding(sq->dsq, sq->n, gm, anch->a, anch->D, afu, afd, apu, apd, apu, apd); //printf("# ASC Decoding UP matrix:\n"); p7_refmx_Dump(stdout, apu); //printf("# ASC Decoding DOWN:\n"); p7_refmx_Dump(stdout, apu); /* Envelope calculation needs to get four matrices: * ASC Decoding pair, apu/apd, and it will leave these constant; * ASC Forward pair, afu/afd, and it will overwrite these. */ p7_reference_Envelopes(sq->dsq, sq->n, gm, anch->a, anch->D, apu, apd, afu, afd, env); p7_envelopes_Dump(stdout, env); p7_envelopes_Destroy(env); p7_anchorhash_Destroy(ah); p7_anchors_Destroy(anch); if (wrk) free(wrk); p7_trace_Destroy(tr); p7_refmx_Destroy(afd); p7_refmx_Destroy(afu); p7_refmx_Destroy(rxd); p7_refmx_Destroy(rxf); esl_sq_Destroy(sq); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_HISTOGRAM *h; ESL_RANDOMNESS *r; double mu = -5.0; double lambda = 2.0; double tau = 0.7; int n = 10000; double binwidth = 0.1; double elambda, etau; int i; double x; double *data; int ndata; int opti; int be_verbose = FALSE; char *plotfile = NULL; FILE *pfp = stdout; int plot_pdf = FALSE; int plot_logpdf = FALSE; int plot_cdf = FALSE; int plot_logcdf = FALSE; int plot_surv = FALSE; int plot_logsurv = FALSE; int xmin_set = FALSE; double xmin; int xmax_set = FALSE; double xmax; int xstep_set = FALSE; double xstep; for (opti = 1; opti < argc && *(argv[opti]) == '-'; opti++) { if (strcmp(argv[opti], "-m") == 0) mu = atof(argv[++opti]); else if (strcmp(argv[opti], "-l") == 0) lambda = atof(argv[++opti]); else if (strcmp(argv[opti], "-n") == 0) n = atoi(argv[++opti]); else if (strcmp(argv[opti], "-o") == 0) plotfile = argv[++opti]; else if (strcmp(argv[opti], "-t") == 0) tau = atof(argv[++opti]); else if (strcmp(argv[opti], "-v") == 0) be_verbose = TRUE; else if (strcmp(argv[opti], "-w") == 0) binwidth = atof(argv[++opti]); else if (strcmp(argv[opti], "-C") == 0) plot_cdf = TRUE; else if (strcmp(argv[opti], "-LC") == 0) plot_logcdf = TRUE; else if (strcmp(argv[opti], "-P") == 0) plot_pdf = TRUE; else if (strcmp(argv[opti], "-LP") == 0) plot_logpdf = TRUE; else if (strcmp(argv[opti], "-S") == 0) plot_surv = TRUE; else if (strcmp(argv[opti], "-LS") == 0) plot_logsurv = TRUE; else if (strcmp(argv[opti], "-XL") == 0) { xmin_set = TRUE; xmin = atof(argv[++opti]); } else if (strcmp(argv[opti], "-XH") == 0) { xmax_set = TRUE; xmax = atof(argv[++opti]); } else if (strcmp(argv[opti], "-XS") == 0) { xstep_set = TRUE; xstep = atof(argv[++opti]); } else ESL_EXCEPTION(eslEINVAL, "bad option"); } if (be_verbose) printf("Parametric: mu = %f lambda = %f tau = %f\n", mu, lambda, tau); r = esl_randomness_Create(0); h = esl_histogram_CreateFull(mu, 100., binwidth); if (plotfile != NULL) { if ((pfp = fopen(plotfile, "w")) == NULL) ESL_EXCEPTION(eslFAIL, "Failed to open plotfile"); } if (! xmin_set) xmin = mu; if (! xmax_set) xmax = mu+40*(1./lambda); if (! xstep_set) xstep = 0.1; for (i = 0; i < n; i++) { x = esl_gam_Sample(r, mu, lambda, tau); esl_histogram_Add(h, x); } esl_histogram_GetData(h, &data, &ndata); esl_gam_FitComplete(data, ndata, mu, &elambda, &etau); if (be_verbose) printf("Complete data fit: mu = %f lambda = %f tau = %f\n", mu, elambda, etau); if (fabs( (elambda-lambda)/lambda ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted lambda > 10%\n"); if (fabs( (etau-tau)/tau ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted tau > 10%\n"); if (plot_pdf) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_pdf, xmin, xmax, xstep); if (plot_logpdf) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_logpdf, xmin, xmax, xstep); if (plot_cdf) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_cdf, xmin, xmax, xstep); if (plot_logcdf) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_logcdf, xmin, xmax, xstep); if (plot_surv) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_surv, xmin, xmax, xstep); if (plot_logsurv) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_logsurv, xmin, xmax, xstep); if (plotfile != NULL) fclose(pfp); esl_randomness_Destroy(r); esl_histogram_Destroy(h); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; ESL_RANDOMNESS *r = NULL; char **as = NULL; /* aligned character seqs (random, iid) */ int N,L; /* # of seqs, and their aligned lengths */ int seed; int i,j; int status; double p[4]; /* ACGT probabilities */ #ifdef eslAUGMENT_ALPHABET ESL_DSQ **ax = NULL; /* digitized alignment */ ESL_ALPHABET *abc = NULL; #endif /* Process command line */ go = esl_getopts_Create(options); esl_opt_ProcessCmdline(go, argc, argv); esl_opt_VerifyConfig(go); if (esl_opt_GetBoolean(go, "-h") == TRUE) { puts(usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ return 0; } L = esl_opt_GetInteger(go, "-L"); N = esl_opt_GetInteger(go, "-N"); seed = esl_opt_GetInteger(go, "--seed"); if (esl_opt_ArgNumber(go) != 0) { puts("Incorrect number of command line arguments."); puts(usage); return 1; } esl_getopts_Destroy(go); /* Create a random DNA alignment; * force it to obey the conventions of the unit tests: * 0,1 are identical * 0,2 are completely dissimilar */ r = esl_randomness_Create(seed); for (i = 0; i < 4; i++) p[i] = 0.25; ESL_ALLOC(as, sizeof(char *) * N); for (i = 0; i < N; i++) ESL_ALLOC(as[i], sizeof(char) * (L+1)); esl_rsq_IID(r, "ACGT", p, 4, L, as[0]); strcpy(as[1], as[0]); esl_rsq_IID(r, "ACGT", p, 4, L, as[2]); for (j = 0; j < L; j++) while (as[2][j] == as[0][j]) as[2][j] = "ACGT"[esl_rnd_Roll(r, 4)]; for (i = 3; i < N; i++) esl_rsq_IID(r, "ACGT", p, 4, L, as[i]); #ifdef eslAUGMENT_ALPHABET abc = esl_alphabet_Create(eslDNA); ESL_ALLOC(ax, sizeof(ESL_DSQ *) * N); for (i = 0; i < N; i++) esl_abc_CreateDsq(abc, as[i], &(ax[i])); #endif /*eslAUGMENT_ALPHABET*/ /* Unit tests */ if (utest_CPairId(as, N) != eslOK) return eslFAIL; if (utest_CJukesCantor(4, as, N) != eslOK) return eslFAIL; #ifdef eslAUGMENT_ALPHABET if (utest_XPairId(abc, as, ax, N) != eslOK) return eslFAIL; if (utest_XJukesCantor(abc, as, ax, N) != eslOK) return eslFAIL; #endif /*eslAUGMENT_ALPHABET*/ #ifdef eslAUGMENT_DMATRIX if (utest_CPairIdMx(as, N) != eslOK) return eslFAIL; if (utest_CDiffMx(as, N) != eslOK) return eslFAIL; if (utest_CJukesCantorMx(4, as, N) != eslOK) return eslFAIL; #endif /* eslAUGMENT_DMATRIX*/ #if defined (eslAUGMENT_ALPHABET) && defined (eslAUGMENT_DMATRIX) if (utest_XPairIdMx(abc, as, ax, N) != eslOK) return eslFAIL; if (utest_XDiffMx(abc, as, ax, N) != eslOK) return eslFAIL; if (utest_XJukesCantorMx(abc, as, ax, N) != eslOK) return eslFAIL; #endif esl_randomness_Destroy(r); esl_Free2D((void **) as, N); #ifdef eslAUGMENT_ALPHABET esl_alphabet_Destroy(abc); esl_Free2D((void **) ax, N); #endif return eslOK; ERROR: return eslFAIL; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); double mu = esl_opt_GetReal (go, "-m"); double lambda = esl_opt_GetReal (go, "-l"); double tau = esl_opt_GetReal (go, "-t"); int n = esl_opt_GetInteger(go, "-n"); double binwidth = esl_opt_GetReal (go, "-w"); int plot_cdf = esl_opt_GetBoolean(go, "--cdf"); int plot_logcdf = esl_opt_GetBoolean(go, "--logcdf"); int plot_pdf = esl_opt_GetBoolean(go, "--pdf"); int plot_logpdf = esl_opt_GetBoolean(go, "--logpdf"); int plot_surv = esl_opt_GetBoolean(go, "--surv"); int plot_logsurv = esl_opt_GetBoolean(go, "--logsurv"); int be_verbose = esl_opt_GetBoolean(go, "-v"); char *plotfile = esl_opt_GetString (go, "-o"); ESL_HISTOGRAM *h = NULL; int xmin_set = esl_opt_IsOn(go, "--xL"); double xmin = xmin_set ? esl_opt_GetReal(go, "--xL") : mu; int xmax_set = esl_opt_IsOn(go, "--xH"); double xmax = xmax_set ? esl_opt_GetReal(go, "--xH") : mu+40*(1./lambda); int xstep_set = esl_opt_IsOn(go, "--xH"); double xstep = xstep_set ? esl_opt_GetReal(go, "--xS") : 0.1; FILE *pfp = stdout; double emu, elambda, etau; int i; double x; double *data; int ndata; fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); if (be_verbose) printf("Parametric: mu = %f lambda = %f tau = %f\n", mu, lambda, tau); h = esl_histogram_CreateFull(mu, 100., binwidth); if (plotfile && (pfp = fopen(plotfile, "w")) == NULL) ESL_EXCEPTION(eslFAIL, "Failed to open plotfile"); for (i = 0; i < n; i++) { x = esl_wei_Sample(rng, mu, lambda, tau); esl_histogram_Add(h, x); } esl_histogram_GetData(h, &data, &ndata); esl_wei_FitComplete(data, ndata, &emu, &elambda, &etau); if (be_verbose) printf("Complete data fit: mu = %f lambda = %f tau = %f\n", emu, elambda, etau); if (fabs( (emu-mu)/mu ) > 0.01) ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted mu > 1%\n"); if (fabs( (elambda-lambda)/lambda ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted lambda > 10%\n"); if (fabs( (etau-tau)/tau ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted tau > 10%\n"); esl_wei_FitCompleteBinned(h, &emu, &elambda, &etau); if (be_verbose) printf("Binned data fit: mu = %f lambda = %f tau = %f\n", emu, elambda, etau); if (fabs( (emu-mu)/mu ) > 0.01) ESL_EXCEPTION(eslFAIL, "Error in (binned) fitted mu > 1%\n"); if (fabs( (elambda-lambda)/lambda ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (binned) fitted lambda > 10%\n"); if (fabs( (etau-tau)/tau ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (binned) fitted lambda > 10%\n"); if (plot_pdf) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_pdf, xmin, xmax, xstep); if (plot_logpdf) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_logpdf, xmin, xmax, xstep); if (plot_cdf) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_cdf, xmin, xmax, xstep); if (plot_logcdf) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_logcdf, xmin, xmax, xstep); if (plot_surv) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_surv, xmin, xmax, xstep); if (plot_logsurv) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_logsurv, xmin, xmax, xstep); if (plotfile) fclose(pfp); esl_histogram_Destroy(h); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); fprintf(stderr, "# status = ok\n"); return 0; }
int main(int argc, char **argv) { ESL_HISTOGRAM *h; ESL_RANDOMNESS *r; ESL_HYPEREXP *hxp; ESL_HYPEREXP *ehxp; int n = 20000; double binwidth = 0.1; int i; double x; double *data; int ndata; int k, ek, mink; double mindiff, diff; int opti; int be_verbose = FALSE; char *paramfile = NULL; char *plotfile = NULL; FILE *pfp = stdout; int plot_pdf = FALSE; int plot_logpdf = FALSE; int plot_cdf = FALSE; int plot_logcdf = FALSE; int plot_surv = FALSE; int plot_logsurv = FALSE; int xmin_set = FALSE; double xmin; int xmax_set = FALSE; double xmax; int xstep_set = FALSE; double xstep; int do_fixmix = FALSE; int status; for (opti = 1; opti < argc && *(argv[opti]) == '-'; opti++) { if (strcmp(argv[opti], "-f") == 0) do_fixmix = TRUE; else if (strcmp(argv[opti], "-i") == 0) paramfile = argv[++opti]; else if (strcmp(argv[opti], "-n") == 0) n = atoi(argv[++opti]); else if (strcmp(argv[opti], "-o") == 0) plotfile = argv[++opti]; else if (strcmp(argv[opti], "-v") == 0) be_verbose = TRUE; else if (strcmp(argv[opti], "-w") == 0) binwidth = atof(argv[++opti]); else if (strcmp(argv[opti], "-C") == 0) plot_cdf = TRUE; else if (strcmp(argv[opti], "-LC") == 0) plot_logcdf = TRUE; else if (strcmp(argv[opti], "-P") == 0) plot_pdf = TRUE; else if (strcmp(argv[opti], "-LP") == 0) plot_logpdf = TRUE; else if (strcmp(argv[opti], "-S") == 0) plot_surv = TRUE; else if (strcmp(argv[opti], "-LS") == 0) plot_logsurv = TRUE; else if (strcmp(argv[opti], "-XL") == 0) { xmin_set = TRUE; xmin = atof(argv[++opti]); } else if (strcmp(argv[opti], "-XH") == 0) { xmax_set = TRUE; xmax = atof(argv[++opti]); } else if (strcmp(argv[opti], "-XS") == 0) { xstep_set = TRUE; xstep = atof(argv[++opti]); } else esl_fatal("bad option"); } if (paramfile != NULL) { status = esl_hyperexp_ReadFile(paramfile, &hxp); if (status == eslENOTFOUND) esl_fatal("Param file %s not found", paramfile); else if (status == eslEFORMAT) esl_fatal("Parse failed: param file %s invalid format", paramfile); else if (status != eslOK) esl_fatal("Unusual failure opening param file %s", paramfile); } else { hxp = esl_hyperexp_Create(3); hxp->mu = -2.0; hxp->q[0] = 0.5; hxp->q[1] = 0.3; hxp->q[2] = 0.2; hxp->lambda[0] = 1.0; hxp->lambda[1] = 0.3; hxp->lambda[2] = 0.1; } if (do_fixmix) esl_hyperexp_FixedUniformMixture(hxp); /* overrides q's above */ if (be_verbose) esl_hyperexp_Dump(stdout, hxp); r = esl_randomness_Create(42); h = esl_histogram_CreateFull(hxp->mu, 100., binwidth); if (plotfile != NULL) { if ((pfp = fopen(plotfile, "w")) == NULL) esl_fatal("Failed to open plotfile"); } if (! xmin_set) xmin = hxp->mu; if (! xmax_set) xmax = hxp->mu+ 20*(1. / esl_vec_DMin(hxp->lambda, hxp->K)); if (! xstep_set) xstep = 0.1; for (i = 0; i < n; i++) { x = esl_hxp_Sample(r, hxp); esl_histogram_Add(h, x); } esl_histogram_GetData(h, &data, &ndata); /* get sorted data vector */ ehxp = esl_hyperexp_Create(hxp->K); if (do_fixmix) esl_hyperexp_FixedUniformMixture(ehxp); esl_hxp_FitGuess(data, ndata, ehxp); if ( esl_hxp_FitComplete(data, ndata, ehxp) != eslOK) esl_fatal("Failed to fit hyperexponential"); if (be_verbose) esl_hyperexp_Dump(stdout, ehxp); if (fabs( (ehxp->mu-hxp->mu)/hxp->mu ) > 0.01) esl_fatal("Error in (complete) fitted mu > 1%\n"); for (ek = 0; ek < ehxp->K; ek++) { /* try to match each estimated lambda up to a parametric lambda */ mindiff = 1.0; mink = -1; for (k = 0; k < hxp->K; k++) { diff = fabs( (ehxp->lambda[ek] - hxp->lambda[k]) / hxp->lambda[k]); if (diff < mindiff) { mindiff = diff; mink = k; } } if (mindiff > 0.50) esl_fatal("Error in (complete) fitted lambda > 50%\n"); if (fabs( (ehxp->q[ek] - hxp->q[mink]) / hxp->q[mink]) > 1.0) esl_fatal("Error in (complete) fitted q > 2-fold%\n"); } esl_hxp_FitGuessBinned(h, ehxp); if ( esl_hxp_FitCompleteBinned(h, ehxp) != eslOK) esl_fatal("Failed to fit binned hyperexponential"); if (be_verbose) esl_hyperexp_Dump(stdout, ehxp); if (fabs( (ehxp->mu-hxp->mu)/hxp->mu ) > 0.01) esl_fatal("Error in (binned) fitted mu > 1%\n"); for (ek = 0; ek < ehxp->K; ek++) { /* try to match each estimated lambda up to a parametric lambda */ mindiff = 1.0; mink = -1; for (k = 0; k < hxp->K; k++) { diff = fabs( (ehxp->lambda[ek] - hxp->lambda[k]) / hxp->lambda[k]); if (diff < mindiff) { mindiff = diff; mink = k; } } if (mindiff > 0.50) esl_fatal("Error in (binned) fitted lambda > 50%\n"); if (fabs( (ehxp->q[ek] - hxp->q[mink]) / hxp->q[mink]) > 1.0) esl_fatal("Error in (binned) fitted q > 2-fold\n"); } if (plot_pdf) esl_hxp_Plot(pfp, hxp, &esl_hxp_pdf, xmin, xmax, xstep); if (plot_logpdf) esl_hxp_Plot(pfp, hxp, &esl_hxp_logpdf, xmin, xmax, xstep); if (plot_cdf) esl_hxp_Plot(pfp, hxp, &esl_hxp_cdf, xmin, xmax, xstep); if (plot_logcdf) esl_hxp_Plot(pfp, hxp, &esl_hxp_logcdf, xmin, xmax, xstep); if (plot_surv) esl_hxp_Plot(pfp, hxp, &esl_hxp_surv, xmin, xmax, xstep); if (plot_logsurv) esl_hxp_Plot(pfp, hxp, &esl_hxp_logsurv, xmin, xmax, xstep); if (plotfile != NULL) fclose(pfp); esl_histogram_Destroy(h); esl_hyperexp_Destroy(hxp); esl_hyperexp_Destroy(ehxp); esl_randomness_Destroy(r); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; ESL_HMM *hmm = NULL; ESL_DSQ *dsq = NULL; int *path = NULL; ESL_HMX *fwd = NULL; ESL_HMX *bck = NULL; ESL_HMX *pp = NULL; int be_verbose = esl_opt_GetBoolean(go, "-v"); float fsc, bsc; int L; int i; float fsum, bsum; make_occasionally_dishonest_casino(&hmm, &abc); esl_hmm_Emit(r, hmm, &dsq, &path, &L); fwd = esl_hmx_Create(L, hmm->M); bck = esl_hmx_Create(L, hmm->M); pp = esl_hmx_Create(L, hmm->M); esl_hmm_Forward (dsq, L, hmm, fwd, &fsc); esl_hmm_Backward(dsq, L, hmm, bck, &bsc); esl_hmm_PosteriorDecoding(dsq, L, hmm, fwd, bck, pp); fsum = 0.0; bsum = bsc; fsum += fwd->sc[0]; if (be_verbose) printf("%4d %c %s %8.3f %8.3f\n", 0, '-', "--", fwd->sc[0], bck->sc[0]); bsum -= bck->sc[0]; for (i = 1; i <= L; i++) { fsum += fwd->sc[i]; if (be_verbose) printf("%4d %c %s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n", i, abc->sym[dsq[i]], path[i] == 0 ? "F " : " L", fwd->sc[i], bck->sc[i], fsum, bsum, fsum+bsum, pp->dp[i][0], pp->dp[i][1]); bsum -= fwd->sc[i]; } if (be_verbose) { printf("%4d %c %s %8.3f %8.3f\n", 0, '-', "--", fwd->sc[L+1], bck->sc[L+1]); printf("Forward score = %f\n", fsc); printf("Backward score = %f\n", bsc); } free(path); free(dsq); esl_hmx_Destroy(pp); esl_hmx_Destroy(bck); esl_hmx_Destroy(fwd); esl_alphabet_Destroy(abc); esl_hmm_Destroy(hmm); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { FILE *fp; ESL_RANDOMNESS *r; /* source of random numbers */ ESL_MIXGEV *mg; /* mixture GEV to sample from */ ESL_MIXGEV *emg; /* estimated mixture GEV */ double *x; /* sampled dataset */ int n = 100000; /* number of samples */ int i; int k; double nll; double min, max; r = esl_randomness_Create(42); mg = esl_mixgev_Create(2); mg->q[0] = 0.85; mg->q[1] = 0.15; mg->mu[0] = -2.72; mg->mu[1] = -2.0; mg->lambda[0] = 2.5; mg->lambda[1] = 1.0; mg->alpha[0] = 0.; mg->alpha[1] = 0.09; nll = 0.; min = 99999; max = -99999; x = malloc(sizeof(double) * n); for (i = 0; i < n; i++) { x[i] = esl_mixgev_Sample(r, mg); nll -= esl_mixgev_logpdf(x[i], mg); if (x[i] > max) max = x[i]; if (x[i] < min) min = x[i]; } printf("NLL of known mixGEV: %g\n", nll); /* Dump the raw data samples to an R file. */ fp = fopen("data.out", "w"); fprintf(fp, " val\n"); for (i = 0; i < n; i++) fprintf(fp, "%d %f\n", i+1, x[i]); fclose(fp); emg = esl_mixgev_Create(2); esl_mixgev_FitGuess(r, x, n, emg); /* esl_mixgev_Copy(mg, emg); */ esl_mixgev_ForceGumbel(emg, 0); esl_mixgev_FitComplete(x, n, emg); printf("Component q mu lambda alpha\n"); for (k=0; k < 2; k++) printf("%d\t%7.4f\t%7.2f\t%7.4f\t%7.4f\n", k, emg->q[k], emg->mu[k], emg->lambda[k], emg->alpha[k]); nll = 0.; for (i = 0; i < n; i++) nll -= esl_mixgev_logpdf(x[i], emg); printf("NLL of fitted mixGEV: %g\n", nll); /* Dump some R commands for showing these distributions */ printf("library(ismev)\n"); printf("library(evd)\n"); printf("d <- read.table(\"data.out\")$val\n"); printf("plot(density(d,bw=0.2), log=\"y\")\n"); printf("min <- %f\n", min); printf("max <- %f\n", max); printf("xax <- seq(min-2, max+5, by=0.1)\n"); printf("cc <- xax - xax\n"); printf("zc <- xax - xax\n"); for (k = 0; k < mg->K; k++) { printf("c%d <- %f * dgev(xax, %f, %f, %f)\n", k, mg->q[k], mg->mu[k], 1./mg->lambda[k], mg->alpha[k]); printf("cc <- cc + c%d\n", k); printf("lines(xax, c%d, col=\"blue\")\n", k); } for (k = 0; k < emg->K; k++) { printf("z%d <- %f * dgev(xax, %f, %f, %f)\n", k, emg->q[k], emg->mu[k], 1./emg->lambda[k], emg->alpha[k]); printf("zc <- zc + z%d\n", k); printf("lines(xax, z%d, col=\"blue\")\n", k); } printf("lines(xax, cc, col=\"green\")\n"); printf("lines(xax, zc, col=\"red\")\n"); esl_mixgev_Destroy(mg); esl_mixgev_Destroy(emg); esl_randomness_Destroy(r); free(x); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* command line configuration */ struct cfg_s cfg; /* application configuration */ char *basename= NULL; /* base of the output file names */ char *alifile = NULL; /* alignment file name */ char *dbfile = NULL; /* name of seq db file */ char outfile[256]; /* name of an output file */ int alifmt; /* format code for alifile */ int dbfmt; /* format code for dbfile */ ESL_MSAFILE *afp = NULL; /* open alignment file */ ESL_MSA *origmsa = NULL; /* one multiple sequence alignment */ ESL_MSA *msa = NULL; /* MSA after frags are removed */ ESL_MSA *trainmsa= NULL; /* training set, aligned */ ESL_STACK *teststack=NULL; /* test set: stack of ESL_SQ ptrs */ int status; /* easel return code */ int nfrags; /* # of fragments removed */ int ntestdom; /* # of test domains */ int ntest; /* # of test sequences created */ int nali; /* number of alignments read */ double avgid; /* Parse command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in app configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h")) cmdline_help(argv[0], go); if (esl_opt_ArgNumber(go) != 3) cmdline_failure(argv[0], "Incorrect number of command line arguments\n"); basename = esl_opt_GetArg(go, 1); alifile = esl_opt_GetArg(go, 2); dbfile = esl_opt_GetArg(go, 3); alifmt = eslMSAFILE_STOCKHOLM; dbfmt = eslSQFILE_FASTA; /* Set up the configuration structure shared amongst functions here */ if (esl_opt_IsDefault(go, "--seed")) cfg.r = esl_randomness_CreateTimeseeded(); else cfg.r = esl_randomness_Create(esl_opt_GetInteger(go, "--seed")); cfg.abc = NULL; /* until we open the MSA file, below */ cfg.fragfrac = esl_opt_GetReal(go, "-F"); cfg.idthresh1 = esl_opt_GetReal(go, "-1"); cfg.idthresh2 = esl_opt_GetReal(go, "-2"); cfg.test_lens = NULL; cfg.ntest = 0; /* Open the output files */ if (snprintf(outfile, 256, "%s.msa", basename) >= 256) esl_fatal("Failed to construct output MSA file name"); if ((cfg.out_msafp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open MSA output file %s\n", outfile); if (snprintf(outfile, 256, "%s.fa", basename) >= 256) esl_fatal("Failed to construct output FASTA file name"); if ((cfg.out_seqfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open FASTA output file %s\n", outfile); if (snprintf(outfile, 256, "%s.pos", basename) >= 256) esl_fatal("Failed to construct pos test set summary file name"); if ((cfg.possummfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open pos test set summary file %s\n", outfile); if (snprintf(outfile, 256, "%s.neg", basename) >= 256) esl_fatal("Failed to construct neg test set summary file name"); if ((cfg.negsummfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open neg test set summary file %s\n", outfile); if (snprintf(outfile, 256, "%s.tbl", basename) >= 256) esl_fatal("Failed to construct benchmark table file name"); if ((cfg.tblfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open benchmark table file %s\n", outfile); /* Open the MSA file; determine alphabet */ status = esl_msafile_Open(alifile, alifmt, NULL, &afp); if (status == eslENOTFOUND) esl_fatal("Alignment file %s doesn't exist or is not readable\n", alifile); else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of alignment %s\n", alifile); else if (status != eslOK) esl_fatal("Alignment file open failed with error %d\n", status); if (esl_opt_GetBoolean(go, "--amino")) cfg.abc = esl_alphabet_Create(eslAMINO); else if (esl_opt_GetBoolean(go, "--dna")) cfg.abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--rna")) cfg.abc = esl_alphabet_Create(eslRNA); else { int type; status = esl_msafile_GuessAlphabet(afp, &type); if (status == eslEAMBIGUOUS) esl_fatal("Failed to guess the bio alphabet used in %s.\nUse --dna, --rna, or --amino option to specify it.", alifile); else if (status == eslEFORMAT) esl_fatal("Alignment file parse failed: %s\n", afp->errbuf); else if (status == eslENODATA) esl_fatal("Alignment file %s is empty\n", alifile); else if (status != eslOK) esl_fatal("Failed to read alignment file %s\n", alifile); cfg.abc = esl_alphabet_Create(type); } esl_msafile_SetDigital(afp, cfg.abc); if (cfg.abc->type == eslAMINO) esl_composition_SW34(cfg.fq); else esl_vec_DSet(cfg.fq, cfg.abc->K, 1.0 / (double) cfg.abc->K); /* Open and process the dbfile; make sure it's in the same alphabet */ process_dbfile(&cfg, dbfile, dbfmt); /* Read and process MSAs one at a time */ nali = 0; while ((status = esl_msa_Read(afp, &origmsa)) == eslOK) { remove_fragments(&cfg, origmsa, &msa, &nfrags); separate_sets (&cfg, msa, &trainmsa, &teststack); ntestdom = esl_stack_ObjectCount(teststack); if (ntestdom >= 2) { esl_stack_Shuffle(cfg.r, teststack); synthesize_positives(go, &cfg, msa->name, teststack, &ntest); esl_msa_MinimGaps(trainmsa, NULL, NULL); esl_msa_Write(cfg.out_msafp, trainmsa, eslMSAFILE_STOCKHOLM); esl_dst_XAverageId(cfg.abc, trainmsa->ax, trainmsa->nseq, 10000, &avgid); /* 10000 is max_comparisons, before sampling kicks in */ fprintf(cfg.tblfp, "%-20s %3.0f%% %6d %6d %6d %6d %6d %6d\n", msa->name, 100.*avgid, (int) trainmsa->alen, msa->nseq, nfrags, trainmsa->nseq, ntestdom, ntest); nali++; } esl_msa_Destroy(trainmsa); esl_msa_Destroy(origmsa); esl_msa_Destroy(msa); } if (status == eslEFORMAT) esl_fatal("Alignment file parse error, line %d of file %s:\n%s\nOffending line is:\n%s\n", afp->linenumber, afp->fname, afp->errbuf, afp->buf); else if (status != eslEOF) esl_fatal("Alignment file read failed with error code %d\n", status); else if (nali == 0) esl_fatal("No alignments found in file %s\n", alifile); if (nali > 0) synthesize_negatives(go, &cfg, esl_opt_GetInteger(go, "-N")); fclose(cfg.out_msafp); fclose(cfg.out_seqfp); fclose(cfg.possummfp); fclose(cfg.negsummfp); fclose(cfg.tblfp); esl_randomness_Destroy(cfg.r); esl_alphabet_Destroy(cfg.abc); esl_msafile_Close(afp); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS*r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET*abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm1, *gm2; int L = 2000; // esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); int MaxPart = esl_opt_GetInteger(go, "-M"); __m128 resdata[10]; float *sc1 = (float*) (resdata+0); // uses 1 __m128s ESL_DSQ *dsq = NULL; int i, j; ESL_SQFILE *sqfp = NULL; DATA_STREAM *dstream; struct timeb tbstart, tbend; int sumlengths = 0; if (p7_hmmfile_Open(hmmfile, NULL, &hfp)!= eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm1 = p7_profile_Create(hmm->M, abc); gm2 = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm1, L, p7_UNILOCAL); p7_ProfileConfig(hmm, bg, gm2, L, p7_UNILOCAL); dstream = p7_ViterbiStream_Create(gm1); p7_ViterbiStream_Setup(dstream, L+100, MaxPart); // use max L dstream->L = L; // No. of partitions computed without full parallelism ( == no. of threads active while some are idle) int Niters_part = dstream->Npartitions % NTHREADS; // No. of Model lines that could be computed but are wasted by idle threads waiting on the end int Nwasted_threads = dstream->partition * ((NTHREADS-Niters_part) % NTHREADS); // No. of lines in the last partition that go beyond M. It's wasted comp time by a single thread int Nwasted_leftover= (dstream->partition - gm1->M % dstream->partition) % dstream->partition; // Total number of wasted lines int wastedcomp = Nwasted_threads + Nwasted_leftover; // Total number of lines computed and waited for int totalcomp = wastedcomp + gm1->M; // same as: roundtop(gm1->M, dstream->partition * NTHREADS); printf("Viterbi Stream Word with %d Threads, model %s: Modelsize %d, #Segms: %d, SeqL: %d, Nseqs %d, Part %d, #Parts %d\n", NTHREADS, hmmfile, gm1->M, (int) ceil(gm1->M/8.0), L, 8*N, dstream->partition, dstream->Npartitions); printf("Total Comp Lines: %d | Wasted Comp Lines: %d\n", totalcomp, wastedcomp); // for ViterbiFilter P7_OPROFILE *om = p7_oprofile_Create(hmm->M, gm1->abc); p7_oprofile_Convert(gm1, om); P7_OMX *ox = p7_omx_Create(hmm->M, 0, 0); dsq_cmp_t **seqsdb= calloc(8*N+64, sizeof(dsq_cmp_t*)); if(0) { ESL_SQ* sq = esl_sq_CreateDigital(abc); if (esl_sqfile_OpenDigital(abc, seqfile, eslSQFILE_FASTA, NULL, &sqfp) != eslOK) { p7_Fail("Failed to open sequence file\n"); return -1; } for (j = 0; j < 8*N; j++) { int res = esl_sqio_Read(sqfp, sq); if (res != eslOK) { printf("ATENCAO: faltam sequencias\n"); break; } int len = sq->n; dsq = sq->dsq; seqsdb[j] = malloc(sizeof(dsq_cmp_t)); seqsdb[j]->length = len; seqsdb[j]->seq = malloc((len+4)*sizeof(ESL_DSQ)); memcpy(seqsdb[j]->seq, dsq, len+2); sumlengths += len; esl_sq_Reuse(sq); } ftime(&tbstart); N = j/8; printf("N = %d\n", N); // Sort sequences by length qsort(seqsdb, N*8, sizeof(dsq_cmp_t*), compare_seqs); } else if(0) for (i = 0; i < N; i++) { for (j = 0; j < 8; j++) { int len = L - rand()%1000; seqsdb[i*8+j] = malloc(sizeof(dsq_cmp_t)); seqsdb[i*8+j]->seq = malloc(len+4); seqsdb[i*8+j]->length = len; esl_rsq_xfIID(r, bg->f, abc->K, len, seqsdb[i*8+j]->seq); sumlengths += len; } } // double sumerrors = 0; float* results = (float*) alloc_m128_aligned64(N*2+2); ftime(&tbstart); for (j = 0; j < N; j++) for (i = 0; i < N; i++) { // if (i % 10000 == 0) printf("START %d\n", i); p7_ViterbiStream(dstream, seqsdb+i*8, sc1); // memcpy(results+i*8, sc1, 32); } ftime(&tbend); double secs = TIMEDIFF(tbstart,tbend); // printf("Qsort time: %6.3f | Viterbi time: %6.3f\n", TIMEDIFF(tbqsort,tbstart), secs); w->elapsed = w->user = secs; esl_stopwatch_Display(stdout, w, "# Opt CPU time: "); printf("# %.0fM cells in %.1f Mc/s\n", (sumlengths * (double) gm1->M) / 1e6, (sumlengths * (double) gm1->M * 1e-6) / secs); if(0) // compare results against base version for (i = 0; i < 1000 && i < N; i++) { int maxll = 0; float sc2; for (j = 0; j < 8; j++) if (maxll < seqsdb[i*8+j]->length) maxll = seqsdb[i*8+j]->length; // for (j = 0; j < 8; j++) printf("%d ", seqsdb[i*8+j]->length); printf("\n"); // if (i % 10 == 0) printf("i %d\n", i); p7_oprofile_ReconfigRestLength(om, maxll); p7_ReconfigLength(gm2, maxll); // fazer Reconfig aqui para emular compl o VitStream for (j = 0; j < 8; j++) { // p7_ReconfigLength(gm2, seqsdb[i*8+j]->length); // p7_Viterbi_unilocal(seqsdb[i*8+j]->seq, seqsdb[i*8+j]->length, gm2, &sc3); // p7_Viterbi_unilocal_word(seqsdb[i*8+j]->seq, seqsdb[i*8+j]->length, gm2, &sc2); // p7_oprofile_ReconfigLength(om, seqsdb[i*8+j]->length); p7_ViterbiFilter(seqsdb[i*8+j]->seq, seqsdb[i*8+j]->length, om, ox, &sc2); //sumerrors += fabs(sc1[j]- sc2); if (fabs(results[i*8+j] - sc2) > 0.00001) { printf("%3d-%d L %4d: VS %f d %f\t| Base SerI %f\n", i, j, seqsdb[i*8+j]->length, results[i*8+j], fabs(results[i*8+j] - sc2), sc2); getc(stdin); } } } return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); double mu = esl_opt_GetReal(go, "-m"); double lambda = esl_opt_GetReal(go, "-l"); double tau = esl_opt_GetReal(go, "-t"); ESL_RANDOMNESS *r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_HISTOGRAM *h = esl_histogram_CreateFull(mu, 100., esl_opt_GetReal(go, "-w"));; int n = esl_opt_GetInteger(go, "-n"); int be_verbose = esl_opt_GetBoolean(go, "-v"); char *plotfile = esl_opt_GetString(go, "-o"); FILE *pfp = stdout; int plot_pdf = esl_opt_GetBoolean(go, "--P"); int plot_logpdf = esl_opt_GetBoolean(go, "--LP"); int plot_cdf = esl_opt_GetBoolean(go, "--C"); int plot_logcdf = esl_opt_GetBoolean(go, "--LC"); int plot_surv = esl_opt_GetBoolean(go, "--S"); int plot_logsurv = esl_opt_GetBoolean(go, "--LS"); double xmin = esl_opt_IsOn(go, "--XL") ? esl_opt_GetReal(go, "--XL") : mu; double xmax = esl_opt_IsOn(go, "--XH") ? esl_opt_GetReal(go, "--XH") : mu+40*(1./lambda); double xstep = esl_opt_IsOn(go, "--XS") ? esl_opt_GetReal(go, "--XS") : 0.1; double emu, elambda, etau; int i; double x; double *data; int ndata; if (be_verbose) printf("Parametric: mu = %f lambda = %f tau = %f\n", mu, lambda, tau); if (plotfile != NULL) { if ((pfp = fopen(plotfile, "w")) == NULL) esl_fatal("Failed to open plotfile"); } for (i = 0; i < n; i++) { x = esl_sxp_Sample(r, mu, lambda, tau); esl_histogram_Add(h, x); } esl_histogram_GetData(h, &data, &ndata); esl_sxp_FitComplete(data, ndata, &emu, &elambda, &etau); if (be_verbose) printf("Complete data fit: mu = %f lambda = %f tau = %f\n", emu, elambda, etau); if (fabs( (emu-mu)/mu ) > 0.01) esl_fatal("Error in (complete) fitted mu > 1%\n"); if (fabs( (elambda-lambda)/lambda ) > 0.10) esl_fatal("Error in (complete) fitted lambda > 10%\n"); if (fabs( (etau-tau)/tau ) > 0.10) esl_fatal("Error in (complete) fitted tau > 10%\n"); esl_sxp_FitCompleteBinned(h, &emu, &elambda, &etau); if (be_verbose) printf("Binned data fit: mu = %f lambda = %f tau = %f\n", emu, elambda, etau); if (fabs( (emu-mu)/mu ) > 0.01) esl_fatal("Error in (binned) fitted mu > 1%\n"); if (fabs( (elambda-lambda)/lambda ) > 0.10) esl_fatal("Error in (binned) fitted lambda > 10%\n"); if (fabs( (etau-tau)/tau ) > 0.10) esl_fatal("Error in (binned) fitted tau > 10%\n"); if (plot_pdf) esl_sxp_Plot(pfp, mu, lambda, tau, &esl_sxp_pdf, xmin, xmax, xstep); if (plot_logpdf) esl_sxp_Plot(pfp, mu, lambda, tau, &esl_sxp_logpdf, xmin, xmax, xstep); if (plot_cdf) esl_sxp_Plot(pfp, mu, lambda, tau, &esl_sxp_cdf, xmin, xmax, xstep); if (plot_logcdf) esl_sxp_Plot(pfp, mu, lambda, tau, &esl_sxp_logcdf, xmin, xmax, xstep); if (plot_surv) esl_sxp_Plot(pfp, mu, lambda, tau, &esl_sxp_surv, xmin, xmax, xstep); if (plot_logsurv) esl_sxp_Plot(pfp, mu, lambda, tau, &esl_sxp_logsurv, xmin, xmax, xstep); if (plotfile != NULL) fclose(pfp); esl_histogram_Destroy(h); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }