Exemplo n.º 1
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go   = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *r    = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc  = NULL;
  P7_BG          *bg   = NULL;
  P7_HMM         *hmm  = NULL;
  P7_OPROFILE    *om   = NULL;
  int             M    = esl_opt_GetInteger(go, "-M");
  int             L    = esl_opt_GetInteger(go, "-L");
  
  fprintf(stderr, "## %s\n", argv[0]);
  fprintf(stderr, "#  rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(r));

  /* Sample a random HMM and optimized profile, in amino acid alphabet.  */
  if ((abc = esl_alphabet_Create(eslAMINO))                    == NULL)  esl_fatal("failed to create alphabet");
  if ((bg = p7_bg_Create(abc))                                 == NULL)  esl_fatal("failed to create null model");
  if (( p7_oprofile_Sample(r, abc, bg, M, L, &hmm, NULL, &om)) != eslOK) esl_fatal("failed to sample HMM and profile");

  /* unit test(s) */
  utest_ReadWrite(hmm, om);

  p7_oprofile_Destroy(om);
  p7_hmm_Destroy(hmm);
  p7_bg_Destroy(bg);
  esl_alphabet_Destroy(abc);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);

  fprintf(stderr, "#  status = ok\n");
  return eslOK;
}
Exemplo n.º 2
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go       = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *rng      = NULL;
  int             stalling = esl_opt_GetBoolean(go, "--stall");
  int             my_rank;
  int             nproc;

  /* The startup sequence below is designed in part to facilitate debugging.
   * To debug an MPI program, you start it with 'mpirun' as usual, but 
   * with the --stall flag; this causes all processes to start, but then
   * wait for the developer to attach gdb's to each running process.
   * Example:
   *      mpirun -n 2 ./p7_hmm_mpi_utest --stall
   *      [pid's <pid0> and <pid1> are reported for processes 0 and 1]
   *   in one terminal window:
   *      gdb ./p7_hmm_mpi_utest <pid0>
   *        [set desired breakpoint in the master]
   *        set stalling=0
   *        c
   *   and similarly in a second terminal window, for worker <pid1>.
   *   
   * Additionally, we want to show the rng seed. This is so we can diagnose
   * failures that are rare; we can re-run the unit test until we see it fail,
   * get the rng seed, then reproduce exactly that failure.
   */ 
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);
  if (my_rank == 0) {
    rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
    fprintf(stderr, "## %s\n", argv[0]);
    fprintf(stderr, "#  rng seed  = %" PRIu32 "\n", esl_randomness_GetSeed(rng));
    fprintf(stderr, "#  MPI nproc = %d\n", nproc);
  }
#ifdef HAVE_GETPID
  /* To debug an MPI program, you attach gdb's to each process, already running;
   * for this, you need the pid of each process. Provide the pid's to help the
   * developer.
   */
  fprintf(stderr, "#    %6d = %d\n", my_rank, getpid()); 
#endif
  /* This infinite loop waits on the developer to attach gdb's to each process.
   * In each gdb, developer may set a better breakpoint, then does "set stalling=0"
   * and "continue" to release the waiting process from this stall point.
   */
  while (stalling);	

  utest_SendRecv(rng, my_rank, nproc);
  
  if (my_rank == 0) {
    fprintf(stderr, "#  status = ok\n");
    esl_randomness_Destroy(rng);
  }

  MPI_Finalize();
  esl_getopts_Destroy(go);
  exit(0); /* success */
}
Exemplo n.º 3
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go  = NULL;
  ESL_RANDOMNESS *r   = NULL;
  int             be_verbose;

  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK ||
      esl_opt_VerifyConfig(go)               != eslOK) esl_fatal("%s", go->errbuf);
  if (esl_opt_GetBoolean(go, "-h") == TRUE) {
    esl_usage(stdout, argv[0], usage);
    puts("\n  where options are:");
    esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */
    exit(0);
  }
  if (esl_opt_ArgNumber(go) != 0) {
    printf("Incorrect number of command line arguments.\n");
    esl_usage(stdout, argv[0], usage);
    exit(1);
  }
  be_verbose = esl_opt_GetBoolean(go, "-v");

  if (esl_opt_GetBoolean(go, "-r")) {
    r = esl_randomness_CreateTimeseeded();
    if (be_verbose) printf("seed = %ld\n", esl_randomness_GetSeed(r));
  } else r = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));

  utest_LinearRegression(r, TRUE,  be_verbose);
  utest_LinearRegression(r, FALSE, be_verbose);
  
  esl_getopts_Destroy(go);
  esl_randomness_Destroy(r);
  exit(0);
}
Exemplo n.º 4
0
/* utest_odds():  test accuracy of logf, expf on odds ratios,
 * our main intended use.
 */
static void
utest_odds(ESL_GETOPTS *go, ESL_RANDOMNESS *r)
{
    int    N            = esl_opt_GetInteger(go, "-N");
    int    verbose      = esl_opt_GetBoolean(go, "-v");
    int    very_verbose = esl_opt_GetBoolean(go, "--vv");
    int    i;
    float  p1, p2, odds;
    union {
        __m128 v;
        float x[4];
    } r1;
    union {
        __m128 v;
        float x[4];
    } r2;
    float  scalar_r1, scalar_r2;
    double  err1, maxerr1 = 0.0, avgerr1 = 0.0; /* errors on logf() */
    double  err2, maxerr2 = 0.0, avgerr2 = 0.0; /* errors on expf() */

    for (i = 0; i < N; i++)
    {
        p1    = esl_rnd_UniformPositive(r);
        p2    = esl_rnd_UniformPositive(r);
        odds  = p1 / p2;

        if (odds == 0.0) esl_fatal("whoa, odds ratio can't be 0!\n");

        r1.v      = esl_sse_logf(_mm_set1_ps(odds));  /* r1.x[z] = log(p1/p2) */
        scalar_r1 = log(odds);

        err1       = (r1.x[0] == 0. && scalar_r1 == 0.) ? 0.0 : 2 * fabs(r1.x[0] - scalar_r1) / fabs(r1.x[0] + scalar_r1);
        if (err1 > maxerr1) maxerr1 = err1;
        avgerr1   += err1 / (float) N;
        if (isnan(avgerr1)) esl_fatal("whoa, what?\n");

        r2.v      = esl_sse_expf(r1.v);        /* and back to odds */
        scalar_r2 = exp(r1.x[0]);

        err2       = (r2.x[0] == 0. && scalar_r2 == 0.) ? 0.0 : 2 * fabs(r2.x[0] - scalar_r2) / fabs(r2.x[0] + scalar_r2);
        if (err2 > maxerr2) maxerr2 = err2;
        avgerr2   += err2 / (float) N;

        if (very_verbose)
            printf("%13.7g  %13.7g  %13.7g  %13.7g  %13.7g  %13.7g  %13.7g\n", odds, scalar_r1, r1.x[0], scalar_r2, r2.x[0], err1, err2);
    }

    if (verbose) {
        printf("Average [max] logf() relative error in %d odds trials:  %13.8g  [%13.8g]\n", N, avgerr1, maxerr1);
        printf("Average [max] expf() relative error in %d odds trials:  %13.8g  [%13.8g]\n", N, avgerr2, maxerr2);
        printf("(random seed : %" PRIu32 ")\n", esl_randomness_GetSeed(r));
    }

    if (avgerr1 > 1e-8) esl_fatal("average error on logf() is intolerable\n");
    if (maxerr1 > 1e-6) esl_fatal("maximum error on logf() is intolerable\n");
    if (avgerr2 > 1e-8) esl_fatal("average error on expf() is intolerable\n");
    if (maxerr2 > 1e-6) esl_fatal("maximum error on expf() is intolerable\n");
}
Exemplo n.º 5
0
static void
utest_SendRecv(ESL_RANDOMNESS *rng, int my_rank, int nproc)
{
  char            msg[] = "utest_SendRecv() failed";
  ESL_ALPHABET   *abc   = esl_alphabet_Create(eslAMINO);
  P7_HMM         *hmm   = NULL;
  P7_HMM         *xhmm  = NULL;
  int             M     = 200;
  char           *wbuf  = NULL;
  int             wn    = 0;
  int             i;
  uint32_t        rngseed;
  MPI_Status      mpistatus;
  char            errmsg[eslERRBUFSIZE];

  if (my_rank == 0) 
    {
      /* First we send our RNG seed to all workers */
      rngseed = esl_randomness_GetSeed(rng);
      for (i = 1; i < nproc; i++)
	if (MPI_Send( &rngseed, 1, MPI_UNSIGNED, i, 0, MPI_COMM_WORLD) != MPI_SUCCESS) esl_fatal(msg);

      /* We sample an HMM that's going to be identical to the workers' */
      if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg);

      for (i = 1; i < nproc; i++)
	{
	  if (p7_hmm_mpi_Recv(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &wbuf, &wn, &abc, &xhmm) != eslOK) esl_fatal(msg);

	  if (p7_hmm_Validate(xhmm, errmsg, 0.001) != eslOK) esl_fatal("%s:\n   %s", msg, errmsg);
	  if (p7_hmm_Compare(hmm, xhmm, 0.001)     != eslOK) esl_fatal(msg);

	  p7_hmm_Destroy(xhmm);
	}
    }
  else 
    {
      /* Worker(s) must first receive the exact same RNG seed that the master is using. */
      if (MPI_Recv(&rngseed, 1, MPI_UNSIGNED, 0, 0, MPI_COMM_WORLD, &mpistatus) != MPI_SUCCESS) esl_fatal(msg);

      /* and then the worker(s) can create the exact same RNG (and random number sequence) that the master has */
      rng = esl_randomness_CreateFast(rngseed);

      /* so when the worker samples this HMM, the master has independently sampled an exact duplicate of it... */
      if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg);

      /* each worker sends the HMM to the master (it's the same HMM for each worker. The test is intended for one master, one worker.) */
      if (p7_hmm_mpi_Send(hmm, 0, 0, MPI_COMM_WORLD, &wbuf, &wn) != eslOK) esl_fatal(msg);

      /* worker's RNG is a private copy; destroy it. Master keeps its RNG, which the caller is responsible for. */
      esl_randomness_Destroy(rng);
     }

  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  free(wbuf);
  return;
}
Exemplo n.º 6
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go       = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *r        = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  int             N        = esl_opt_GetInteger(go, "-N");
  P7_TOPHITS     *h1       = NULL;
  P7_TOPHITS     *h2       = NULL;
  P7_TOPHITS     *h3       = NULL;
  char            name[]   = "not_unique_name";
  char            acc[]    = "not_unique_acc";
  char            desc[]   = "Test description for the purposes of making the test driver allocate space";
  double          key;
  int             i;

  fprintf(stderr, "## %s\n", argv[0]);
  fprintf(stderr, "#  rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(r));

  h1 = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC);
  h2 = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC);
  h3 = p7_tophits_Create(p7_TOPHITS_DEFAULT_INIT_ALLOC);
  
  for (i = 0; i < N; i++) 
  {
      key = esl_random(r);
      tophits_Add(h1, name, acc, desc, key);
      key = 10.0 * esl_random(r);
      tophits_Add(h2, name, acc, desc, key);
      key = 0.1 * esl_random(r);
      tophits_Add(h3, name, acc, desc, key);
  }
  tophits_Add(h1, "last",  NULL, NULL, -1.0);
  tophits_Add(h1, "first", NULL, NULL, 20.0);

  p7_tophits_SortBySortkey(h1);
  if (strcmp(h1->hit[0]->name,   "first") != 0) esl_fatal("sort failed (top is %s = %f)", h1->hit[0]->name,   h1->hit[0]->sortkey);
  if (strcmp(h1->hit[N+1]->name, "last")  != 0) esl_fatal("sort failed (last is %s = %f)", h1->hit[N+1]->name, h1->hit[N+1]->sortkey);

  p7_tophits_Merge(h1, h2);
  if (strcmp(h1->hit[0]->name,     "first") != 0) esl_fatal("after merge 1, sort failed (top is %s = %f)", h1->hit[0]->name,     h1->hit[0]->sortkey);
  if (strcmp(h1->hit[2*N+1]->name, "last")  != 0) esl_fatal("after merge 1, sort failed (last is %s = %f)", h1->hit[2*N+1]->name, h1->hit[2*N+1]->sortkey);

  p7_tophits_Merge(h3, h1);
  if (strcmp(h3->hit[0]->name,     "first") != 0) esl_fatal("after merge 2, sort failed (top is %s = %f)", h3->hit[0]->name,     h3->hit[0]->sortkey);
  if (strcmp(h3->hit[3*N+1]->name, "last")  != 0) esl_fatal("after merge 2, sort failed (last is %s = %f)", h3->hit[3*N+1]->name,     h3->hit[3*N+1]->sortkey);
  
  if (p7_tophits_GetMaxNameLength(h3) != strlen(name)) esl_fatal("GetMaxNameLength() failed");

  p7_tophits_Destroy(h1);
  p7_tophits_Destroy(h2);
  p7_tophits_Destroy(h3);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);

  fprintf(stderr, "#  status = ok\n");
  return eslOK;
}
Exemplo n.º 7
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go          = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *rng         = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  int             be_verbose  = esl_opt_GetBoolean(go, "-v");

  if (be_verbose) printf("p7_bg unit test: rng seed %" PRIu32 "\n", esl_randomness_GetSeed(rng));

  utest_ReadWrite(rng);

  esl_randomness_Destroy(rng);
  esl_getopts_Destroy(go);
  return 0;
}
Exemplo n.º 8
0
int
main(int argc, char **argv)
{
    ESL_GETOPTS    *go   = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
    ESL_RANDOMNESS *rng  = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));

    fprintf(stderr, "## %s\n", argv[0]);
    fprintf(stderr, "#  rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng));

    utest_sampling(rng);

    fprintf(stderr, "#  status = ok\n");

    esl_randomness_Destroy(rng);
    esl_getopts_Destroy(go);
    exit(0); /* success */
}
Exemplo n.º 9
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go         = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *r          = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
  int             be_verbose = esl_opt_GetBoolean(go, "-v");
  int             N          = esl_opt_GetInteger(go, "-N");

  if (be_verbose) printf("seed = %" PRIu32 "\n", esl_randomness_GetSeed(r));

  utest_LogGamma(r, N, be_verbose);
  utest_LinearRegression(r, TRUE,  be_verbose);
  utest_LinearRegression(r, FALSE, be_verbose);
  
  esl_getopts_Destroy(go);
  esl_randomness_Destroy(r);
  exit(0);
}
Exemplo n.º 10
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go   = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *rng  = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc  = esl_alphabet_Create(eslAMINO);
  int             M    = 50;

  fprintf(stderr, "## %s\n", argv[0]);
  fprintf(stderr, "#  rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng));

  utest_generation (rng, M, abc, 10);  // test a bunch of seqs to try to make sure we exercise exact domain score recalculation
  utest_singlemulti(rng, M, abc, 10);

  fprintf(stderr, "#  status = ok\n");

  esl_alphabet_Destroy(abc);
  esl_randomness_Destroy(rng);
  esl_getopts_Destroy(go);
  return 0;
}
Exemplo n.º 11
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go          = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *rng         = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));

  fprintf(stderr, "## %s\n", argv[0]);
  fprintf(stderr, "#  rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng));

  utest_ReadWrite(rng);

  utest_alphabet_config(eslAMINO);
  utest_alphabet_config(eslDNA);
  utest_alphabet_config(eslRNA);
  utest_alphabet_config(eslCOINS);
  utest_alphabet_config(eslDICE);

  esl_randomness_Destroy(rng);
  esl_getopts_Destroy(go);

  fprintf(stderr, "#  status = ok\n");
  return 0;
}
Exemplo n.º 12
0
/* Function:  p7_Calibrate()
 * Synopsis:  Calibrate the E-value parameters of a model.
 * Incept:    SRE, Thu Dec 25 09:29:31 2008 [Magallon]
 *
 * Purpose:   Calibrate the E-value parameters of a model with 
 *            one calculation ($\lambda$) and two brief simulations
 *            (Viterbi $\mu$, Forward $\tau$).
 *            
 * Args:      hmm     - HMM to be calibrated
 *            cfg_b   - OPTCFG: ptr to optional build configuration;
 *                      if <NULL>, use default parameters.
 *            byp_rng - BYPASS optimization: pass ptr to <ESL_RANDOMNESS> generator
 *                      if already known; 
 *                      <*byp_rng> == NULL> if <rng> return is desired;
 *                      pass <NULL> to use and discard internal default.
 *            byp_bg  - BYPASS optimization: pass ptr to <P7_BG> if already known; 
 *                      <*byp_bg == NULL> if <bg> return is desired;
 *                      pass <NULL> to use and discard internal default.
 *            byp_gm  - BYPASS optimization: pass ptr to <gm> profile if already known;
 *                      pass <*byp_gm == NULL> if <gm> return desired;
 *                      pass <NULL> to use and discard internal default.
 *            byp_om  - BYPASS optimization: pass ptr to <om> profile if already known;
 *                      pass <*byp_om == NULL> if <om> return desired;
 *                      pass <NULL> to use and discard internal default.          
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation failure.
 *            <eslEINVAL> if <hmm>, <gm>, <om> aren't compatible somehow.
 *
 * Xref:      J4/41
 */
int
p7_Calibrate(P7_HMM *hmm, P7_BUILDER *cfg_b, ESL_RANDOMNESS **byp_rng, P7_BG **byp_bg, P7_PROFILE **byp_gm, P7_OPROFILE **byp_om)
{
  P7_BG          *bg     = (esl_byp_IsProvided(byp_bg)  ? *byp_bg  : NULL); 
  P7_PROFILE     *gm     = (esl_byp_IsProvided(byp_gm)  ? *byp_gm  : NULL); 
  P7_OPROFILE    *om     = (esl_byp_IsProvided(byp_om)  ? *byp_om  : NULL); 
  ESL_RANDOMNESS *r      = (esl_byp_IsProvided(byp_rng) ? *byp_rng : NULL);
  char           *errbuf = ((cfg_b != NULL) ? cfg_b->errbuf : NULL);
  int             EmL    = ((cfg_b != NULL) ? cfg_b->EmL    : 200);
  int             EmN    = ((cfg_b != NULL) ? cfg_b->EmN    : 200);
  int             EvL    = ((cfg_b != NULL) ? cfg_b->EvL    : 200);
  int             EvN    = ((cfg_b != NULL) ? cfg_b->EvN    : 200);
  int             EfL    = ((cfg_b != NULL) ? cfg_b->EfL    : 100);
  int             EfN    = ((cfg_b != NULL) ? cfg_b->EfN    : 200);
  double          Eft    = ((cfg_b != NULL) ? cfg_b->Eft    : 0.04);
  double          lambda, mmu, vmu, tau;
  int             status;
  
  /* Configure any objects we need
   * that weren't already passed to us as a bypass optimization 
   */
  if (r  == NULL) {
    if ((r = esl_randomness_CreateFast(42)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create RNG");
  } else if (cfg_b != NULL && cfg_b->do_reseeding) {
    esl_randomness_Init(r, esl_randomness_GetSeed(r));
  }

  if (bg == NULL) {
    if ((bg = p7_bg_Create(hmm->abc)) == NULL)  ESL_XFAIL(eslEMEM, errbuf, "failed to allocate background");
  }

  /* there's an odd case where the <om> is provided and a <gm> isn't going to be returned
   * where we don't need a <gm> at all, and <gm> stays <NULL> after the next block.
   * Note that the <EvL> length in the ProfileConfig doesn't matter; the individual
   * calibration routines MSVMu(), etc. contain their own length reconfig calls.
   */
  if ((esl_byp_IsInternal(byp_gm) && ! esl_byp_IsProvided(byp_om)) || esl_byp_IsReturned(byp_gm)) {
    if  ( (gm     = p7_profile_Create(hmm->M, hmm->abc))          == NULL)  ESL_XFAIL(eslEMEM, errbuf, "failed to allocate profile");
    if  ( (status = p7_ProfileConfig(hmm, bg, gm, EvL, p7_LOCAL)) != eslOK) ESL_XFAIL(status,  errbuf, "failed to configure profile");
  }

  if (om == NULL) {
    if ((om     = p7_oprofile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create optimized profile");
    if ((status = p7_oprofile_Convert(gm, om))         != eslOK) ESL_XFAIL(status,  errbuf, "failed to convert to optimized profile");
  }

  /* The calibration steps themselves */
  if ((status = p7_Lambda(hmm, bg, &lambda))                          != eslOK) ESL_XFAIL(status,  errbuf, "failed to determine lambda");
  if ((status = p7_MSVMu    (r, om, bg, EmL, EmN, lambda, &mmu))      != eslOK) ESL_XFAIL(status,  errbuf, "failed to determine msv mu");
  if ((status = p7_ViterbiMu(r, om, bg, EvL, EvN, lambda, &vmu))      != eslOK) ESL_XFAIL(status,  errbuf, "failed to determine vit mu");
  if ((status = p7_Tau      (r, om, bg, EfL, EfN, lambda, Eft, &tau)) != eslOK) ESL_XFAIL(status,  errbuf, "failed to determine fwd tau");

  /* Store results */
  hmm->evparam[p7_MLAMBDA] = om->evparam[p7_MLAMBDA] = lambda;
  hmm->evparam[p7_VLAMBDA] = om->evparam[p7_VLAMBDA] = lambda;
  hmm->evparam[p7_FLAMBDA] = om->evparam[p7_FLAMBDA] = lambda;
  hmm->evparam[p7_MMU]     = om->evparam[p7_MMU]     = mmu;
  hmm->evparam[p7_VMU]     = om->evparam[p7_VMU]     = vmu;
  hmm->evparam[p7_FTAU]    = om->evparam[p7_FTAU]    = tau;
  hmm->flags              |= p7H_STATS;

  if (gm != NULL) {
    gm->evparam[p7_MLAMBDA] = lambda;
    gm->evparam[p7_VLAMBDA] = lambda;
    gm->evparam[p7_FLAMBDA] = lambda;
    gm->evparam[p7_MMU]     = mmu;
    gm->evparam[p7_VMU]     = vmu;
    gm->evparam[p7_FTAU]    = tau;
  }
    
  if (byp_rng != NULL) *byp_rng = r;  else esl_randomness_Destroy(r); /* bypass convention: no-op if rng was provided.*/
  if (byp_bg  != NULL) *byp_bg  = bg; else p7_bg_Destroy(bg);         /* bypass convention: no-op if bg was provided. */
  if (byp_gm  != NULL) *byp_gm  = gm; else p7_profile_Destroy(gm);    /* bypass convention: no-op if gm was provided. */
  if (byp_om  != NULL) *byp_om  = om; else p7_oprofile_Destroy(om);   /* bypass convention: no-op if om was provided. */
  return eslOK;

 ERROR:
  if (! esl_byp_IsProvided(byp_rng)) esl_randomness_Destroy(r);
  if (! esl_byp_IsProvided(byp_bg))  p7_bg_Destroy(bg);
  if (! esl_byp_IsProvided(byp_gm))  p7_profile_Destroy(gm);
  if (! esl_byp_IsProvided(byp_om))  p7_oprofile_Destroy(om);
  return status;
}
Exemplo n.º 13
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go   = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *rng  = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
  double  mu           = esl_opt_GetReal   (go, "-m");
  double  lambda       = esl_opt_GetReal   (go, "-l");
  double  tau          = esl_opt_GetReal   (go, "-t");
  int     n            = esl_opt_GetInteger(go, "-n");
  double  binwidth     = esl_opt_GetReal   (go, "-w");
  int     plot_cdf     = esl_opt_GetBoolean(go, "--cdf");
  int     plot_logcdf  = esl_opt_GetBoolean(go, "--logcdf");
  int     plot_pdf     = esl_opt_GetBoolean(go, "--pdf");
  int     plot_logpdf  = esl_opt_GetBoolean(go, "--logpdf");
  int     plot_surv    = esl_opt_GetBoolean(go, "--surv");
  int     plot_logsurv = esl_opt_GetBoolean(go, "--logsurv");
  int     be_verbose   = esl_opt_GetBoolean(go, "-v");
  char   *plotfile     = esl_opt_GetString (go, "-o");
  ESL_HISTOGRAM  *h    = NULL;
  int     xmin_set     = esl_opt_IsOn(go, "--xL");
  double  xmin         = xmin_set ? esl_opt_GetReal(go, "--xL") : mu;
  int     xmax_set     = esl_opt_IsOn(go, "--xH");
  double  xmax         = xmax_set ? esl_opt_GetReal(go, "--xH") : mu+40*(1./lambda);
  int     xstep_set    = esl_opt_IsOn(go, "--xH");
  double  xstep        = xstep_set ? esl_opt_GetReal(go, "--xS") : 0.1;
  FILE   *pfp          = stdout;
  double  emu, elambda, etau;
  int     i;
  double  x;
  double *data;
  int     ndata;

  fprintf(stderr, "## %s\n", argv[0]);
  fprintf(stderr, "#  rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng));

  if (be_verbose) printf("Parametric:  mu = %f   lambda = %f    tau = %f\n", mu, lambda, tau);

  h = esl_histogram_CreateFull(mu, 100., binwidth);
  if (plotfile && (pfp = fopen(plotfile, "w")) == NULL) ESL_EXCEPTION(eslFAIL, "Failed to open plotfile");

  for (i = 0; i < n; i++)
    {
      x = esl_wei_Sample(rng, mu, lambda, tau);
      esl_histogram_Add(h, x);
    }
  esl_histogram_GetData(h, &data, &ndata);

  esl_wei_FitComplete(data, ndata, &emu, &elambda, &etau);
  if (be_verbose) printf("Complete data fit:  mu = %f   lambda = %f   tau = %f\n", emu, elambda, etau);
  if (fabs( (emu-mu)/mu ) > 0.01)             ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted mu > 1%\n");
  if (fabs( (elambda-lambda)/lambda ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted lambda > 10%\n");
  if (fabs( (etau-tau)/tau ) > 0.10)          ESL_EXCEPTION(eslFAIL, "Error in (complete) fitted tau > 10%\n");

  esl_wei_FitCompleteBinned(h, &emu, &elambda, &etau);
  if (be_verbose)    printf("Binned data fit:  mu = %f   lambda = %f   tau = %f\n", emu, elambda, etau);
  if (fabs( (emu-mu)/mu ) > 0.01)             ESL_EXCEPTION(eslFAIL, "Error in (binned) fitted mu > 1%\n");
  if (fabs( (elambda-lambda)/lambda ) > 0.10) ESL_EXCEPTION(eslFAIL, "Error in (binned) fitted lambda > 10%\n");
  if (fabs( (etau-tau)/tau ) > 0.10)          ESL_EXCEPTION(eslFAIL, "Error in (binned) fitted lambda > 10%\n");

  if (plot_pdf)     esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_pdf,     xmin, xmax, xstep);
  if (plot_logpdf)  esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_logpdf,  xmin, xmax, xstep);
  if (plot_cdf)     esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_cdf,     xmin, xmax, xstep);
  if (plot_logcdf)  esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_logcdf,  xmin, xmax, xstep);
  if (plot_surv)    esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_surv,    xmin, xmax, xstep);
  if (plot_logsurv) esl_wei_Plot(pfp, mu, lambda, tau, &esl_wei_logsurv, xmin, xmax, xstep);

  if (plotfile) fclose(pfp);
  esl_histogram_Destroy(h);
  esl_randomness_Destroy(rng);
  esl_getopts_Destroy(go);

  fprintf(stderr, "#  status = ok\n");
  return 0;
}
Exemplo n.º 14
0
/* glocal_region_trace_ensemble()
 * EPN, Tue Oct  5 10:13:25 2010
 *
 * Based on p7_domaindef.c's region_trace_ensemble(). Modified so that
 * generic matrices (which can be used for glocally configured models)
 * can be used. An additional parameter <do_null2> has been added,
 * so that null2-related calculations are only done if necessary.
 * That is, they're skipped if null2 has been turned off in the pipeline.
 * 
 * Notes from p7_domaindef.c::region_trace_ensemble():
 *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 * SRE, Fri Feb  8 11:49:44 2008 [Janelia]
 *
 * Here, we've decided that region <ireg>..<jreg> in sequence <dsq> might be
 * composed of more than one domain, and we're going to use clustering
 * of a posterior ensemble of stochastic tracebacks to sort it out.
 * 
 * Caller provides a filled Forward matrix in <fwd> for the sequence
 * region <dsq+ireg-1>, length <jreg-ireg+1>, for the model <om>
 * configured in multihit mode with its target length distribution
 * set to the total length of <dsq>: i.e., the same model
 * configuration used to score the complete sequence (if it weren't
 * multihit, we wouldn't be worried about multiple domains).
 * 
 * Caller also provides a DP matrix in <wrk> containing at least one
 * row, for use as temporary workspace. (This will typically be the
 * caller's Backwards matrix, which we haven't yet used at this point
 * in the processing pipeline.)
 * 
 * Caller provides <ddef>, which defines heuristic parameters that
 * control the clustering, and provides working space for the
 * calculation and the answers. The <ddef->sp> object must have been
 * reused (i.e., it needs to be fresh; we're going to use it here);
 * the caller needs to Reuse() it specifically, because it can't just
 * Reuse() the whole <ddef>, when it's in the process of analyzing
 * regions.
 * 
 * Upon return, <*ret_nc> contains the number of clusters that were
 * defined.
 * 
 * The caller can retrieve info on each cluster by calling
 * <p7_spensemble_GetClusterCoords(ddef->sp...)> on the
 * <P7_SPENSEMBLE> object in <ddef>.
 * 
 * Other information on what's happened in working memory:
 * 
 * <ddef->n2sc[ireg..jreg]> now contains log f'(x_i) / f(x_i) null2 scores
 *    for each residue.
 *
 * <ddef->sp> gets filled in, and upon return, it's holding the answers 
 *    (the cluster definitions). When the caller is done retrieving those
 *    answers, it needs to <esl_spensemble_Reuse()> it before calling
 *    <region_trace_ensemble()> again.
 *    
 * <ddef->tr> is used as working memory for sampled traces.
 *    
 * <wrk> has had its zero row clobbered as working space for a null2 calculation.
 *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
static int
glocal_region_trace_ensemble(P7_DOMAINDEF *ddef, const P7_PROFILE *gm, const ESL_DSQ *dsq, int ireg, int jreg, 
			     const P7_GMX *fwd, P7_GMX *wrk, int do_null2, int *ret_nc)
{
  int    Lr  = jreg-ireg+1;
  int    t, d, d2;
  int    nov, n;
  int    nc;
  int    pos;
  float  null2[p7_MAXCODE];

  esl_vec_FSet(ddef->n2sc+ireg, Lr, 0.0); /* zero the null2 scores in region */

  /* By default, we make results reproducible by forcing a reset of
   * the RNG to its originally seeded state.
   */
  if (ddef->do_reseeding) 
    esl_randomness_Init(ddef->r, esl_randomness_GetSeed(ddef->r));

  /* Collect an ensemble of sampled traces; calculate null2 odds ratios from these if nec */
  for (t = 0; t < ddef->nsamples; t++)
    {
      p7_GStochasticTrace(ddef->r, dsq+ireg-1, Lr, gm, fwd, ddef->tr);
      p7_trace_Index(ddef->tr);

      pos = 1;
      for (d = 0; d < ddef->tr->ndom; d++)
	{
	  p7_spensemble_Add(ddef->sp, t, ddef->tr->sqfrom[d]+ireg-1, ddef->tr->sqto[d]+ireg-1, ddef->tr->hmmfrom[d], ddef->tr->hmmto[d]);
	  
	  if(do_null2) { 
	    p7_GNull2_ByTrace(gm, ddef->tr, ddef->tr->tfrom[d], ddef->tr->tto[d], wrk, null2);
	    
	    /* residues outside domains get bumped +1: because f'(x) = f(x), so f'(x)/f(x) = 1 in these segments */
	    for (; pos <= ddef->tr->sqfrom[d]; pos++) ddef->n2sc[ireg+pos-1] += 1.0;
	    
	    /* Residues inside domains get bumped by their null2 ratio */
	    for (; pos <= ddef->tr->sqto[d];   pos++) ddef->n2sc[ireg+pos-1] += null2[dsq[ireg+pos-1]];
	  }
	}
      if(do_null2) { 
	/* the remaining residues in the region outside any domains get +1 */
	for (; pos <= Lr; pos++)  ddef->n2sc[ireg+pos-1] += 1.0;
      }
      p7_trace_Reuse(ddef->tr);        
    }

  /* Convert the accumulated n2sc[] ratios in this region to log odds null2 scores on each residue. */
  if(do_null2) { 
    for (pos = ireg; pos <= jreg; pos++)
      ddef->n2sc[pos] = logf(ddef->n2sc[pos] / (float) ddef->nsamples);
  }

  /* Cluster the ensemble of traces to break region into envelopes. */
  p7_spensemble_Cluster(ddef->sp, ddef->min_overlap, ddef->of_smaller, ddef->max_diagdiff, ddef->min_posterior, ddef->min_endpointp, &nc);

  /* A little hacky now. Remove "dominated" domains relative to seq coords. */
  for (d = 0; d < nc; d++) 
    ddef->sp->assignment[d] = 0; /* overload <assignment> to flag that a domain is dominated */

  /* who dominates who? (by post prob) */
  for (d = 0; d < nc; d++)
    {
      for (d2 = d+1; d2 < nc; d2++)
	{
	  nov = ESL_MIN(ddef->sp->sigc[d].j, ddef->sp->sigc[d2].j) - ESL_MAX(ddef->sp->sigc[d].i, ddef->sp->sigc[d2].i) + 1;
	  if (nov == 0) break;
	  n   = ESL_MIN(ddef->sp->sigc[d].j - ddef->sp->sigc[d].i + 1,  ddef->sp->sigc[d2].j - ddef->sp->sigc[d2].i + 1);
	  if ((float) nov / (float) n >= 0.8) /* overlap */
	    {
	      if (ddef->sp->sigc[d].prob > ddef->sp->sigc[d2].prob) ddef->sp->assignment[d2] = 1;
	      else                                                  ddef->sp->assignment[d]  = 1;
	    }
	}
    }
      
  /* shrink the sigc list, removing dominated domains */
  d = 0;
  for (d2 = 0; d2 < nc; d2++)
    {
      if (ddef->sp->assignment[d2]) continue; /* skip domain d2, it's dominated. */
      if (d != d2) memcpy(ddef->sp->sigc + d, ddef->sp->sigc + d2, sizeof(struct p7_spcoord_s));
      d++;
    }
  ddef->sp->nc = d;
  *ret_nc = d;
  return eslOK;
}