int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_BG *bg = NULL; P7_HMM *hmm = NULL; P7_OPROFILE *om = NULL; int M = esl_opt_GetInteger(go, "-M"); int L = esl_opt_GetInteger(go, "-L"); fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(r)); /* Sample a random HMM and optimized profile, in amino acid alphabet. */ if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model"); if (( p7_oprofile_Sample(r, abc, bg, M, L, &hmm, NULL, &om)) != eslOK) esl_fatal("failed to sample HMM and profile"); /* unit test(s) */ utest_ReadWrite(hmm, om); p7_oprofile_Destroy(om); p7_hmm_Destroy(hmm); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); fprintf(stderr, "# status = ok\n"); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = NULL; int stalling = esl_opt_GetBoolean(go, "--stall"); int my_rank; int nproc; /* The startup sequence below is designed in part to facilitate debugging. * To debug an MPI program, you start it with 'mpirun' as usual, but * with the --stall flag; this causes all processes to start, but then * wait for the developer to attach gdb's to each running process. * Example: * mpirun -n 2 ./p7_hmm_mpi_utest --stall * [pid's <pid0> and <pid1> are reported for processes 0 and 1] * in one terminal window: * gdb ./p7_hmm_mpi_utest <pid0> * [set desired breakpoint in the master] * set stalling=0 * c * and similarly in a second terminal window, for worker <pid1>. * * Additionally, we want to show the rng seed. This is so we can diagnose * failures that are rare; we can re-run the unit test until we see it fail, * get the rng seed, then reproduce exactly that failure. */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (my_rank == 0) { rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); fprintf(stderr, "# MPI nproc = %d\n", nproc); } #ifdef HAVE_GETPID /* To debug an MPI program, you attach gdb's to each process, already running; * for this, you need the pid of each process. Provide the pid's to help the * developer. */ fprintf(stderr, "# %6d = %d\n", my_rank, getpid()); #endif /* This infinite loop waits on the developer to attach gdb's to each process. * In each gdb, developer may set a better breakpoint, then does "set stalling=0" * and "continue" to release the waiting process from this stall point. */ while (stalling); utest_SendRecv(rng, my_rank, nproc); if (my_rank == 0) { fprintf(stderr, "# status = ok\n"); esl_randomness_Destroy(rng); } MPI_Finalize(); esl_getopts_Destroy(go); exit(0); /* success */ }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; ESL_RANDOMNESS *r = NULL; int be_verbose; go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) esl_fatal("%s", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { esl_usage(stdout, argv[0], usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ exit(0); } if (esl_opt_ArgNumber(go) != 0) { printf("Incorrect number of command line arguments.\n"); esl_usage(stdout, argv[0], usage); exit(1); } be_verbose = esl_opt_GetBoolean(go, "-v"); if (esl_opt_GetBoolean(go, "-r")) { r = esl_randomness_CreateTimeseeded(); if (be_verbose) printf("seed = %ld\n", esl_randomness_GetSeed(r)); } else r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); utest_LinearRegression(r, TRUE, be_verbose); utest_LinearRegression(r, FALSE, be_verbose); esl_getopts_Destroy(go); esl_randomness_Destroy(r); exit(0); }
/* utest_odds(): test accuracy of logf, expf on odds ratios, * our main intended use. */ static void utest_odds(ESL_GETOPTS *go, ESL_RANDOMNESS *r) { int N = esl_opt_GetInteger(go, "-N"); int verbose = esl_opt_GetBoolean(go, "-v"); int very_verbose = esl_opt_GetBoolean(go, "--vv"); int i; float p1, p2, odds; union { __m128 v; float x[4]; } r1; union { __m128 v; float x[4]; } r2; float scalar_r1, scalar_r2; double err1, maxerr1 = 0.0, avgerr1 = 0.0; /* errors on logf() */ double err2, maxerr2 = 0.0, avgerr2 = 0.0; /* errors on expf() */ for (i = 0; i < N; i++) { p1 = esl_rnd_UniformPositive(r); p2 = esl_rnd_UniformPositive(r); odds = p1 / p2; if (odds == 0.0) esl_fatal("whoa, odds ratio can't be 0!\n"); r1.v = esl_sse_logf(_mm_set1_ps(odds)); /* r1.x[z] = log(p1/p2) */ scalar_r1 = log(odds); err1 = (r1.x[0] == 0. && scalar_r1 == 0.) ? 0.0 : 2 * fabs(r1.x[0] - scalar_r1) / fabs(r1.x[0] + scalar_r1); if (err1 > maxerr1) maxerr1 = err1; avgerr1 += err1 / (float) N; if (isnan(avgerr1)) esl_fatal("whoa, what?\n"); r2.v = esl_sse_expf(r1.v); /* and back to odds */ scalar_r2 = exp(r1.x[0]); err2 = (r2.x[0] == 0. && scalar_r2 == 0.) ? 0.0 : 2 * fabs(r2.x[0] - scalar_r2) / fabs(r2.x[0] + scalar_r2); if (err2 > maxerr2) maxerr2 = err2; avgerr2 += err2 / (float) N; if (very_verbose) printf("%13.7g %13.7g %13.7g %13.7g %13.7g %13.7g %13.7g\n", odds, scalar_r1, r1.x[0], scalar_r2, r2.x[0], err1, err2); } if (verbose) { printf("Average [max] logf() relative error in %d odds trials: %13.8g [%13.8g]\n", N, avgerr1, maxerr1); printf("Average [max] expf() relative error in %d odds trials: %13.8g [%13.8g]\n", N, avgerr2, maxerr2); printf("(random seed : %" PRIu32 ")\n", esl_randomness_GetSeed(r)); } if (avgerr1 > 1e-8) esl_fatal("average error on logf() is intolerable\n"); if (maxerr1 > 1e-6) esl_fatal("maximum error on logf() is intolerable\n"); if (avgerr2 > 1e-8) esl_fatal("average error on expf() is intolerable\n"); if (maxerr2 > 1e-6) esl_fatal("maximum error on expf() is intolerable\n"); }
static void utest_SendRecv(ESL_RANDOMNESS *rng, int my_rank, int nproc) { char msg[] = "utest_SendRecv() failed"; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_HMM *hmm = NULL; P7_HMM *xhmm = NULL; int M = 200; char *wbuf = NULL; int wn = 0; int i; uint32_t rngseed; MPI_Status mpistatus; char errmsg[eslERRBUFSIZE]; if (my_rank == 0) { /* First we send our RNG seed to all workers */ rngseed = esl_randomness_GetSeed(rng); for (i = 1; i < nproc; i++) if (MPI_Send( &rngseed, 1, MPI_UNSIGNED, i, 0, MPI_COMM_WORLD) != MPI_SUCCESS) esl_fatal(msg); /* We sample an HMM that's going to be identical to the workers' */ if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg); for (i = 1; i < nproc; i++) { if (p7_hmm_mpi_Recv(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &wbuf, &wn, &abc, &xhmm) != eslOK) esl_fatal(msg); if (p7_hmm_Validate(xhmm, errmsg, 0.001) != eslOK) esl_fatal("%s:\n %s", msg, errmsg); if (p7_hmm_Compare(hmm, xhmm, 0.001) != eslOK) esl_fatal(msg); p7_hmm_Destroy(xhmm); } } else { /* Worker(s) must first receive the exact same RNG seed that the master is using. */ if (MPI_Recv(&rngseed, 1, MPI_UNSIGNED, 0, 0, MPI_COMM_WORLD, &mpistatus) != MPI_SUCCESS) esl_fatal(msg); /* and then the worker(s) can create the exact same RNG (and random number sequence) that the master has */ rng = esl_randomness_CreateFast(rngseed); /* so when the worker samples this HMM, the master has independently sampled an exact duplicate of it... */ if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg); /* each worker sends the HMM to the master (it's the same HMM for each worker. The test is intended for one master, one worker.) */ if (p7_hmm_mpi_Send(hmm, 0, 0, MPI_COMM_WORLD, &wbuf, &wn) != eslOK) esl_fatal(msg); /* worker's RNG is a private copy; destroy it. Master keeps its RNG, which the caller is responsible for. */ esl_randomness_Destroy(rng); } p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); free(wbuf); return; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); int N = esl_opt_GetInteger(go, "-N"); P7_TOPHITS *h1 = NULL; P7_TOPHITS *h2 = NULL; P7_TOPHITS *h3 = NULL; char name[] = "not_unique_name"; char acc[] = "not_unique_acc"; char desc[] = "Test description for the purposes of making the test driver allocate space"; double key; int i; fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(r)); h1 = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC); h2 = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC); h3 = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC); for (i = 0; i < N; i++) { key = esl_random(r); tophits_Add(h1, name, acc, desc, key); key = 10.0 * esl_random(r); tophits_Add(h2, name, acc, desc, key); key = 0.1 * esl_random(r); tophits_Add(h3, name, acc, desc, key); } tophits_Add(h1, "last", NULL, NULL, -1.0); tophits_Add(h1, "first", NULL, NULL, 20.0); p7_tophits_SortBySortkey(h1); if (strcmp(h1->hit[0]->name, "first") != 0) esl_fatal("sort failed (top is %s = %f)", h1->hit[0]->name, h1->hit[0]->sortkey); if (strcmp(h1->hit[N+1]->name, "last") != 0) esl_fatal("sort failed (last is %s = %f)", h1->hit[N+1]->name, h1->hit[N+1]->sortkey); p7_tophits_Merge(h1, h2); if (strcmp(h1->hit[0]->name, "first") != 0) esl_fatal("after merge 1, sort failed (top is %s = %f)", h1->hit[0]->name, h1->hit[0]->sortkey); if (strcmp(h1->hit[2*N+1]->name, "last") != 0) esl_fatal("after merge 1, sort failed (last is %s = %f)", h1->hit[2*N+1]->name, h1->hit[2*N+1]->sortkey); p7_tophits_Merge(h3, h1); if (strcmp(h3->hit[0]->name, "first") != 0) esl_fatal("after merge 2, sort failed (top is %s = %f)", h3->hit[0]->name, h3->hit[0]->sortkey); if (strcmp(h3->hit[3*N+1]->name, "last") != 0) esl_fatal("after merge 2, sort failed (last is %s = %f)", h3->hit[3*N+1]->name, h3->hit[3*N+1]->sortkey); if (p7_tophits_GetMaxNameLength(h3) != strlen(name)) esl_fatal("GetMaxNameLength() failed"); p7_tophits_Destroy(h1); p7_tophits_Destroy(h2); p7_tophits_Destroy(h3); esl_randomness_Destroy(r); esl_getopts_Destroy(go); fprintf(stderr, "# status = ok\n"); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); int be_verbose = esl_opt_GetBoolean(go, "-v"); if (be_verbose) printf("p7_bg unit test: rng seed %" PRIu32 "\n", esl_randomness_GetSeed(rng)); utest_ReadWrite(rng); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); utest_sampling(rng); fprintf(stderr, "# status = ok\n"); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); exit(0); /* success */ }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); int be_verbose = esl_opt_GetBoolean(go, "-v"); int N = esl_opt_GetInteger(go, "-N"); if (be_verbose) printf("seed = %" PRIu32 "\n", esl_randomness_GetSeed(r)); utest_LogGamma(r, N, be_verbose); utest_LinearRegression(r, TRUE, be_verbose); utest_LinearRegression(r, FALSE, be_verbose); esl_getopts_Destroy(go); esl_randomness_Destroy(r); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); int M = 50; fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); utest_generation (rng, M, abc, 10); // test a bunch of seqs to try to make sure we exercise exact domain score recalculation utest_singlemulti(rng, M, abc, 10); fprintf(stderr, "# status = ok\n"); esl_alphabet_Destroy(abc); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); utest_ReadWrite(rng); utest_alphabet_config(eslAMINO); utest_alphabet_config(eslDNA); utest_alphabet_config(eslRNA); utest_alphabet_config(eslCOINS); utest_alphabet_config(eslDICE); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); fprintf(stderr, "# status = ok\n"); return 0; }
/* Function: p7_Calibrate() * Synopsis: Calibrate the E-value parameters of a model. * Incept: SRE, Thu Dec 25 09:29:31 2008 [Magallon] * * Purpose: Calibrate the E-value parameters of a model with * one calculation ($\lambda$) and two brief simulations * (Viterbi $\mu$, Forward $\tau$). * * Args: hmm - HMM to be calibrated * cfg_b - OPTCFG: ptr to optional build configuration; * if <NULL>, use default parameters. * byp_rng - BYPASS optimization: pass ptr to <ESL_RANDOMNESS> generator * if already known; * <*byp_rng> == NULL> if <rng> return is desired; * pass <NULL> to use and discard internal default. * byp_bg - BYPASS optimization: pass ptr to <P7_BG> if already known; * <*byp_bg == NULL> if <bg> return is desired; * pass <NULL> to use and discard internal default. * byp_gm - BYPASS optimization: pass ptr to <gm> profile if already known; * pass <*byp_gm == NULL> if <gm> return desired; * pass <NULL> to use and discard internal default. * byp_om - BYPASS optimization: pass ptr to <om> profile if already known; * pass <*byp_om == NULL> if <om> return desired; * pass <NULL> to use and discard internal default. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEINVAL> if <hmm>, <gm>, <om> aren't compatible somehow. * * Xref: J4/41 */ int p7_Calibrate(P7_HMM *hmm, P7_BUILDER *cfg_b, ESL_RANDOMNESS **byp_rng, P7_BG **byp_bg, P7_PROFILE **byp_gm, P7_OPROFILE **byp_om) { P7_BG *bg = (esl_byp_IsProvided(byp_bg) ? *byp_bg : NULL); P7_PROFILE *gm = (esl_byp_IsProvided(byp_gm) ? *byp_gm : NULL); P7_OPROFILE *om = (esl_byp_IsProvided(byp_om) ? *byp_om : NULL); ESL_RANDOMNESS *r = (esl_byp_IsProvided(byp_rng) ? *byp_rng : NULL); char *errbuf = ((cfg_b != NULL) ? cfg_b->errbuf : NULL); int EmL = ((cfg_b != NULL) ? cfg_b->EmL : 200); int EmN = ((cfg_b != NULL) ? cfg_b->EmN : 200); int EvL = ((cfg_b != NULL) ? cfg_b->EvL : 200); int EvN = ((cfg_b != NULL) ? cfg_b->EvN : 200); int EfL = ((cfg_b != NULL) ? cfg_b->EfL : 100); int EfN = ((cfg_b != NULL) ? cfg_b->EfN : 200); double Eft = ((cfg_b != NULL) ? cfg_b->Eft : 0.04); double lambda, mmu, vmu, tau; int status; /* Configure any objects we need * that weren't already passed to us as a bypass optimization */ if (r == NULL) { if ((r = esl_randomness_CreateFast(42)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create RNG"); } else if (cfg_b != NULL && cfg_b->do_reseeding) { esl_randomness_Init(r, esl_randomness_GetSeed(r)); } if (bg == NULL) { if ((bg = p7_bg_Create(hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to allocate background"); } /* there's an odd case where the <om> is provided and a <gm> isn't going to be returned * where we don't need a <gm> at all, and <gm> stays <NULL> after the next block. * Note that the <EvL> length in the ProfileConfig doesn't matter; the individual * calibration routines MSVMu(), etc. contain their own length reconfig calls. */ if ((esl_byp_IsInternal(byp_gm) && ! esl_byp_IsProvided(byp_om)) || esl_byp_IsReturned(byp_gm)) { if ( (gm = p7_profile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to allocate profile"); if ( (status = p7_ProfileConfig(hmm, bg, gm, EvL, p7_LOCAL)) != eslOK) ESL_XFAIL(status, errbuf, "failed to configure profile"); } if (om == NULL) { if ((om = p7_oprofile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create optimized profile"); if ((status = p7_oprofile_Convert(gm, om)) != eslOK) ESL_XFAIL(status, errbuf, "failed to convert to optimized profile"); } /* The calibration steps themselves */ if ((status = p7_Lambda(hmm, bg, &lambda)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine lambda"); if ((status = p7_MSVMu (r, om, bg, EmL, EmN, lambda, &mmu)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine msv mu"); if ((status = p7_ViterbiMu(r, om, bg, EvL, EvN, lambda, &vmu)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine vit mu"); if ((status = p7_Tau (r, om, bg, EfL, EfN, lambda, Eft, &tau)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine fwd tau"); /* Store results */ hmm->evparam[p7_MLAMBDA] = om->evparam[p7_MLAMBDA] = lambda; hmm->evparam[p7_VLAMBDA] = om->evparam[p7_VLAMBDA] = lambda; hmm->evparam[p7_FLAMBDA] = om->evparam[p7_FLAMBDA] = lambda; hmm->evparam[p7_MMU] = om->evparam[p7_MMU] = mmu; hmm->evparam[p7_VMU] = om->evparam[p7_VMU] = vmu; hmm->evparam[p7_FTAU] = om->evparam[p7_FTAU] = tau; hmm->flags |= p7H_STATS; if (gm != NULL) { gm->evparam[p7_MLAMBDA] = lambda; gm->evparam[p7_VLAMBDA] = lambda; gm->evparam[p7_FLAMBDA] = lambda; gm->evparam[p7_MMU] = mmu; gm->evparam[p7_VMU] = vmu; gm->evparam[p7_FTAU] = tau; } if (byp_rng != NULL) *byp_rng = r; else esl_randomness_Destroy(r); /* bypass convention: no-op if rng was provided.*/ if (byp_bg != NULL) *byp_bg = bg; else p7_bg_Destroy(bg); /* bypass convention: no-op if bg was provided. */ if (byp_gm != NULL) *byp_gm = gm; else p7_profile_Destroy(gm); /* bypass convention: no-op if gm was provided. */ if (byp_om != NULL) *byp_om = om; else p7_oprofile_Destroy(om); /* bypass convention: no-op if om was provided. */ return eslOK; ERROR: if (! esl_byp_IsProvided(byp_rng)) esl_randomness_Destroy(r); if (! esl_byp_IsProvided(byp_bg)) p7_bg_Destroy(bg); if (! esl_byp_IsProvided(byp_gm)) p7_profile_Destroy(gm); if (! esl_byp_IsProvided(byp_om)) p7_oprofile_Destroy(om); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); double mu = esl_opt_GetReal (go, "-m"); double lambda = esl_opt_GetReal (go, "-l"); double tau = esl_opt_GetReal (go, "-t"); int n = esl_opt_GetInteger(go, "-n"); double binwidth = esl_opt_GetReal (go, "-w"); int plot_cdf = esl_opt_GetBoolean(go, "--cdf"); int plot_logcdf = esl_opt_GetBoolean(go, "--logcdf"); int plot_pdf = esl_opt_GetBoolean(go, "--pdf"); int plot_logpdf = esl_opt_GetBoolean(go, "--logpdf"); int plot_surv = esl_opt_GetBoolean(go, "--surv"); int plot_logsurv = esl_opt_GetBoolean(go, "--logsurv"); int be_verbose = esl_opt_GetBoolean(go, "-v"); char *plotfile = esl_opt_GetString (go, "-o"); ESL_HISTOGRAM *h = NULL; int xmin_set = esl_opt_IsOn(go, "--xL"); double xmin = xmin_set ? esl_opt_GetReal(go, "--xL") : mu; int xmax_set = esl_opt_IsOn(go, "--xH"); double xmax = xmax_set ? esl_opt_GetReal(go, "--xH") : mu+40*(1./lambda); int xstep_set = esl_opt_IsOn(go, "--xH"); double xstep = xstep_set ? esl_opt_GetReal(go, "--xS") : 0.1; FILE *pfp = stdout; double emu, elambda, etau; int i; double x; double *data; int ndata; fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); if (be_verbose) printf("Parametric: mu = %f lambda = %f tau = %f\n", mu, lambda, tau); h = esl_histogram_CreateFull(mu, 100., binwidth); if (plotfile && (pfp = fopen(plotfile, "w")) == NULL) ESL_EXCEPTION(eslFAIL, "Failed to open plotfile"); for (i = 0; i < n; i++) { x = esl_wei_Sample(rng, mu, lambda, tau); esl_histogram_Add(h, x); } esl_histogram_GetData(h, &data, &ndata); esl_wei_FitComplete(data, ndata, &emu, &elambda, &etau); if (be_verbose) printf("Complete data fit: mu = %f lambda = %f tau = %f\n", emu, elambda, etau); if (fabs( (emu-mu)/mu ) > 0.01) ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted mu > 1%\n"); if (fabs( (elambda-lambda)/lambda ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted lambda > 10%\n"); if (fabs( (etau-tau)/tau ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted tau > 10%\n"); esl_wei_FitCompleteBinned(h, &emu, &elambda, &etau); if (be_verbose) printf("Binned data fit: mu = %f lambda = %f tau = %f\n", emu, elambda, etau); if (fabs( (emu-mu)/mu ) > 0.01) ESL_EXCEPTION(eslFAIL, "Error in (binned) fitted mu > 1%\n"); if (fabs( (elambda-lambda)/lambda ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (binned) fitted lambda > 10%\n"); if (fabs( (etau-tau)/tau ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (binned) fitted lambda > 10%\n"); if (plot_pdf) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_pdf, xmin, xmax, xstep); if (plot_logpdf) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_logpdf, xmin, xmax, xstep); if (plot_cdf) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_cdf, xmin, xmax, xstep); if (plot_logcdf) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_logcdf, xmin, xmax, xstep); if (plot_surv) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_surv, xmin, xmax, xstep); if (plot_logsurv) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_logsurv, xmin, xmax, xstep); if (plotfile) fclose(pfp); esl_histogram_Destroy(h); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); fprintf(stderr, "# status = ok\n"); return 0; }
/* glocal_region_trace_ensemble() * EPN, Tue Oct 5 10:13:25 2010 * * Based on p7_domaindef.c's region_trace_ensemble(). Modified so that * generic matrices (which can be used for glocally configured models) * can be used. An additional parameter <do_null2> has been added, * so that null2-related calculations are only done if necessary. * That is, they're skipped if null2 has been turned off in the pipeline. * * Notes from p7_domaindef.c::region_trace_ensemble(): *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * SRE, Fri Feb 8 11:49:44 2008 [Janelia] * * Here, we've decided that region <ireg>..<jreg> in sequence <dsq> might be * composed of more than one domain, and we're going to use clustering * of a posterior ensemble of stochastic tracebacks to sort it out. * * Caller provides a filled Forward matrix in <fwd> for the sequence * region <dsq+ireg-1>, length <jreg-ireg+1>, for the model <om> * configured in multihit mode with its target length distribution * set to the total length of <dsq>: i.e., the same model * configuration used to score the complete sequence (if it weren't * multihit, we wouldn't be worried about multiple domains). * * Caller also provides a DP matrix in <wrk> containing at least one * row, for use as temporary workspace. (This will typically be the * caller's Backwards matrix, which we haven't yet used at this point * in the processing pipeline.) * * Caller provides <ddef>, which defines heuristic parameters that * control the clustering, and provides working space for the * calculation and the answers. The <ddef->sp> object must have been * reused (i.e., it needs to be fresh; we're going to use it here); * the caller needs to Reuse() it specifically, because it can't just * Reuse() the whole <ddef>, when it's in the process of analyzing * regions. * * Upon return, <*ret_nc> contains the number of clusters that were * defined. * * The caller can retrieve info on each cluster by calling * <p7_spensemble_GetClusterCoords(ddef->sp...)> on the * <P7_SPENSEMBLE> object in <ddef>. * * Other information on what's happened in working memory: * * <ddef->n2sc[ireg..jreg]> now contains log f'(x_i) / f(x_i) null2 scores * for each residue. * * <ddef->sp> gets filled in, and upon return, it's holding the answers * (the cluster definitions). When the caller is done retrieving those * answers, it needs to <esl_spensemble_Reuse()> it before calling * <region_trace_ensemble()> again. * * <ddef->tr> is used as working memory for sampled traces. * * <wrk> has had its zero row clobbered as working space for a null2 calculation. *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ static int glocal_region_trace_ensemble(P7_DOMAINDEF *ddef, const P7_PROFILE *gm, const ESL_DSQ *dsq, int ireg, int jreg, const P7_GMX *fwd, P7_GMX *wrk, int do_null2, int *ret_nc) { int Lr = jreg-ireg+1; int t, d, d2; int nov, n; int nc; int pos; float null2[p7_MAXCODE]; esl_vec_FSet(ddef->n2sc+ireg, Lr, 0.0); /* zero the null2 scores in region */ /* By default, we make results reproducible by forcing a reset of * the RNG to its originally seeded state. */ if (ddef->do_reseeding) esl_randomness_Init(ddef->r, esl_randomness_GetSeed(ddef->r)); /* Collect an ensemble of sampled traces; calculate null2 odds ratios from these if nec */ for (t = 0; t < ddef->nsamples; t++) { p7_GStochasticTrace(ddef->r, dsq+ireg-1, Lr, gm, fwd, ddef->tr); p7_trace_Index(ddef->tr); pos = 1; for (d = 0; d < ddef->tr->ndom; d++) { p7_spensemble_Add(ddef->sp, t, ddef->tr->sqfrom[d]+ireg-1, ddef->tr->sqto[d]+ireg-1, ddef->tr->hmmfrom[d], ddef->tr->hmmto[d]); if(do_null2) { p7_GNull2_ByTrace(gm, ddef->tr, ddef->tr->tfrom[d], ddef->tr->tto[d], wrk, null2); /* residues outside domains get bumped +1: because f'(x) = f(x), so f'(x)/f(x) = 1 in these segments */ for (; pos <= ddef->tr->sqfrom[d]; pos++) ddef->n2sc[ireg+pos-1] += 1.0; /* Residues inside domains get bumped by their null2 ratio */ for (; pos <= ddef->tr->sqto[d]; pos++) ddef->n2sc[ireg+pos-1] += null2[dsq[ireg+pos-1]]; } } if(do_null2) { /* the remaining residues in the region outside any domains get +1 */ for (; pos <= Lr; pos++) ddef->n2sc[ireg+pos-1] += 1.0; } p7_trace_Reuse(ddef->tr); } /* Convert the accumulated n2sc[] ratios in this region to log odds null2 scores on each residue. */ if(do_null2) { for (pos = ireg; pos <= jreg; pos++) ddef->n2sc[pos] = logf(ddef->n2sc[pos] / (float) ddef->nsamples); } /* Cluster the ensemble of traces to break region into envelopes. */ p7_spensemble_Cluster(ddef->sp, ddef->min_overlap, ddef->of_smaller, ddef->max_diagdiff, ddef->min_posterior, ddef->min_endpointp, &nc); /* A little hacky now. Remove "dominated" domains relative to seq coords. */ for (d = 0; d < nc; d++) ddef->sp->assignment[d] = 0; /* overload <assignment> to flag that a domain is dominated */ /* who dominates who? (by post prob) */ for (d = 0; d < nc; d++) { for (d2 = d+1; d2 < nc; d2++) { nov = ESL_MIN(ddef->sp->sigc[d].j, ddef->sp->sigc[d2].j) - ESL_MAX(ddef->sp->sigc[d].i, ddef->sp->sigc[d2].i) + 1; if (nov == 0) break; n = ESL_MIN(ddef->sp->sigc[d].j - ddef->sp->sigc[d].i + 1, ddef->sp->sigc[d2].j - ddef->sp->sigc[d2].i + 1); if ((float) nov / (float) n >= 0.8) /* overlap */ { if (ddef->sp->sigc[d].prob > ddef->sp->sigc[d2].prob) ddef->sp->assignment[d2] = 1; else ddef->sp->assignment[d] = 1; } } } /* shrink the sigc list, removing dominated domains */ d = 0; for (d2 = 0; d2 < nc; d2++) { if (ddef->sp->assignment[d2]) continue; /* skip domain d2, it's dominated. */ if (d != d2) memcpy(ddef->sp->sigc + d, ddef->sp->sigc + d2, sizeof(struct p7_spcoord_s)); d++; } ddef->sp->nc = d; *ret_nc = d; return eslOK; }