Exemple #1
0
/* Sample random domain segment positions, start/end pairs, sorted and nonoverlapping.
 */
int
p7_coords2_Sample(ESL_RANDOMNESS *rng, P7_COORDS2 *c2, int32_t maxseg, int32_t L, int32_t **byp_wrk)
{
  int32_t *wrk  = NULL;
  int32_t  nseg = 1 + esl_rnd_Roll(rng, maxseg); /* 1..maxseg */
  int32_t  i;
  int      status;

  /* Using the bypass idiom, make sure we have a workspace for <L> coords */
  if      (esl_byp_IsInternal(byp_wrk) ) ESL_ALLOC(wrk, sizeof(int32_t) * L);
  else if (esl_byp_IsReturned(byp_wrk) ) ESL_ALLOC(wrk, sizeof(int32_t) * L);
  else if (esl_byp_IsProvided(byp_wrk) ) { wrk = *byp_wrk; ESL_REALLOC(wrk, sizeof(int32_t) * L); }
			      
  /* We put the numbers 1..L into the workspace <wrk>; shuffle them;
   * then sort the top nseg*2 of them. This gives us <nseg>
   * nonoverlapping start/end coords, in order.
   */
  for (i = 0; i < L; i++) wrk[i] = i+1;
  esl_vec_IShuffle(rng, wrk, L);
  esl_vec_ISortIncreasing(wrk, nseg*2);

  /* Store those randomized coords now in the data structure. */
  p7_coords2_GrowTo(c2, nseg);
  c2->n    = nseg;
  for (i = 0; i < nseg; i++)
    {
      c2->arr[i].n1 = wrk[i*2];
      c2->arr[i].n2 = wrk[i*2+1];
    }
  
  /* Using the bypass idiom, recycle workspace, if we're supposed to */
  if      (esl_byp_IsInternal(byp_wrk)) free(wrk);
  else if (esl_byp_IsReturned(byp_wrk)) *byp_wrk = wrk;
  else if (esl_byp_IsProvided(byp_wrk)) *byp_wrk = wrk;
  return eslOK;

 ERROR:
  if (esl_byp_IsInternal(byp_wrk) && wrk) free(wrk);
  return status;
}
Exemple #2
0
/* Function:  p7_Calibrate()
 * Synopsis:  Calibrate the E-value parameters of a model.
 * Incept:    SRE, Thu Dec 25 09:29:31 2008 [Magallon]
 *
 * Purpose:   Calibrate the E-value parameters of a model with 
 *            one calculation ($\lambda$) and two brief simulations
 *            (Viterbi $\mu$, Forward $\tau$).
 *            
 * Args:      hmm     - HMM to be calibrated
 *            cfg_b   - OPTCFG: ptr to optional build configuration;
 *                      if <NULL>, use default parameters.
 *            byp_rng - BYPASS optimization: pass ptr to <ESL_RANDOMNESS> generator
 *                      if already known; 
 *                      <*byp_rng> == NULL> if <rng> return is desired;
 *                      pass <NULL> to use and discard internal default.
 *            byp_bg  - BYPASS optimization: pass ptr to <P7_BG> if already known; 
 *                      <*byp_bg == NULL> if <bg> return is desired;
 *                      pass <NULL> to use and discard internal default.
 *            byp_gm  - BYPASS optimization: pass ptr to <gm> profile if already known;
 *                      pass <*byp_gm == NULL> if <gm> return desired;
 *                      pass <NULL> to use and discard internal default.
 *            byp_om  - BYPASS optimization: pass ptr to <om> profile if already known;
 *                      pass <*byp_om == NULL> if <om> return desired;
 *                      pass <NULL> to use and discard internal default.          
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation failure.
 *            <eslEINVAL> if <hmm>, <gm>, <om> aren't compatible somehow.
 *
 * Xref:      J4/41
 */
int
p7_Calibrate(P7_HMM *hmm, P7_BUILDER *cfg_b, ESL_RANDOMNESS **byp_rng, P7_BG **byp_bg, P7_PROFILE **byp_gm, P7_OPROFILE **byp_om)
{
  P7_BG          *bg     = (esl_byp_IsProvided(byp_bg)  ? *byp_bg  : NULL); 
  P7_PROFILE     *gm     = (esl_byp_IsProvided(byp_gm)  ? *byp_gm  : NULL); 
  P7_OPROFILE    *om     = (esl_byp_IsProvided(byp_om)  ? *byp_om  : NULL); 
  ESL_RANDOMNESS *r      = (esl_byp_IsProvided(byp_rng) ? *byp_rng : NULL);
  char           *errbuf = ((cfg_b != NULL) ? cfg_b->errbuf : NULL);
  int             EmL    = ((cfg_b != NULL) ? cfg_b->EmL    : 200);
  int             EmN    = ((cfg_b != NULL) ? cfg_b->EmN    : 200);
  int             EvL    = ((cfg_b != NULL) ? cfg_b->EvL    : 200);
  int             EvN    = ((cfg_b != NULL) ? cfg_b->EvN    : 200);
  int             EfL    = ((cfg_b != NULL) ? cfg_b->EfL    : 100);
  int             EfN    = ((cfg_b != NULL) ? cfg_b->EfN    : 200);
  double          Eft    = ((cfg_b != NULL) ? cfg_b->Eft    : 0.04);
  double          lambda, mmu, vmu, tau;
  int             status;
  
  /* Configure any objects we need
   * that weren't already passed to us as a bypass optimization 
   */
  if (r  == NULL) {
    if ((r = esl_randomness_CreateFast(42)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create RNG");
  } else if (cfg_b != NULL && cfg_b->do_reseeding) {
    esl_randomness_Init(r, esl_randomness_GetSeed(r));
  }

  if (bg == NULL) {
    if ((bg = p7_bg_Create(hmm->abc)) == NULL)  ESL_XFAIL(eslEMEM, errbuf, "failed to allocate background");
  }

  /* there's an odd case where the <om> is provided and a <gm> isn't going to be returned
   * where we don't need a <gm> at all, and <gm> stays <NULL> after the next block.
   * Note that the <EvL> length in the ProfileConfig doesn't matter; the individual
   * calibration routines MSVMu(), etc. contain their own length reconfig calls.
   */
  if ((esl_byp_IsInternal(byp_gm) && ! esl_byp_IsProvided(byp_om)) || esl_byp_IsReturned(byp_gm)) {
    if  ( (gm     = p7_profile_Create(hmm->M, hmm->abc))          == NULL)  ESL_XFAIL(eslEMEM, errbuf, "failed to allocate profile");
    if  ( (status = p7_ProfileConfig(hmm, bg, gm, EvL, p7_LOCAL)) != eslOK) ESL_XFAIL(status,  errbuf, "failed to configure profile");
  }

  if (om == NULL) {
    if ((om     = p7_oprofile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create optimized profile");
    if ((status = p7_oprofile_Convert(gm, om))         != eslOK) ESL_XFAIL(status,  errbuf, "failed to convert to optimized profile");
  }

  /* The calibration steps themselves */
  if ((status = p7_Lambda(hmm, bg, &lambda))                          != eslOK) ESL_XFAIL(status,  errbuf, "failed to determine lambda");
  if ((status = p7_MSVMu    (r, om, bg, EmL, EmN, lambda, &mmu))      != eslOK) ESL_XFAIL(status,  errbuf, "failed to determine msv mu");
  if ((status = p7_ViterbiMu(r, om, bg, EvL, EvN, lambda, &vmu))      != eslOK) ESL_XFAIL(status,  errbuf, "failed to determine vit mu");
  if ((status = p7_Tau      (r, om, bg, EfL, EfN, lambda, Eft, &tau)) != eslOK) ESL_XFAIL(status,  errbuf, "failed to determine fwd tau");

  /* Store results */
  hmm->evparam[p7_MLAMBDA] = om->evparam[p7_MLAMBDA] = lambda;
  hmm->evparam[p7_VLAMBDA] = om->evparam[p7_VLAMBDA] = lambda;
  hmm->evparam[p7_FLAMBDA] = om->evparam[p7_FLAMBDA] = lambda;
  hmm->evparam[p7_MMU]     = om->evparam[p7_MMU]     = mmu;
  hmm->evparam[p7_VMU]     = om->evparam[p7_VMU]     = vmu;
  hmm->evparam[p7_FTAU]    = om->evparam[p7_FTAU]    = tau;
  hmm->flags              |= p7H_STATS;

  if (gm != NULL) {
    gm->evparam[p7_MLAMBDA] = lambda;
    gm->evparam[p7_VLAMBDA] = lambda;
    gm->evparam[p7_FLAMBDA] = lambda;
    gm->evparam[p7_MMU]     = mmu;
    gm->evparam[p7_VMU]     = vmu;
    gm->evparam[p7_FTAU]    = tau;
  }
    
  if (byp_rng != NULL) *byp_rng = r;  else esl_randomness_Destroy(r); /* bypass convention: no-op if rng was provided.*/
  if (byp_bg  != NULL) *byp_bg  = bg; else p7_bg_Destroy(bg);         /* bypass convention: no-op if bg was provided. */
  if (byp_gm  != NULL) *byp_gm  = gm; else p7_profile_Destroy(gm);    /* bypass convention: no-op if gm was provided. */
  if (byp_om  != NULL) *byp_om  = om; else p7_oprofile_Destroy(om);   /* bypass convention: no-op if om was provided. */
  return eslOK;

 ERROR:
  if (! esl_byp_IsProvided(byp_rng)) esl_randomness_Destroy(r);
  if (! esl_byp_IsProvided(byp_bg))  p7_bg_Destroy(bg);
  if (! esl_byp_IsProvided(byp_gm))  p7_profile_Destroy(gm);
  if (! esl_byp_IsProvided(byp_om))  p7_oprofile_Destroy(om);
  return status;
}
/* All input sources funnel through here.
 * Here, <afp> is already allocated and initialized, and the input
 * <bf> is opened successfully.
 */
static int
profillic_msafile_OpenBuffer(ESL_ALPHABET **byp_abc, ESL_BUFFER *bf, int format, ESLX_MSAFILE_FMTDATA *fmtd,  ESLX_MSAFILE *afp)
{
  ESL_ALPHABET        *abc       = NULL;
  int                  alphatype = eslUNKNOWN;
  int                  status;

  /* if caller provided <fmtd>, copy it into afp->fmtd */
  if (fmtd) eslx_msafile_fmtdata_Copy(fmtd, &(afp->fmtd));

  /* Determine the format */
  if (format == eslMSAFILE_UNKNOWN) 
    {
      status = eslx_msafile_GuessFileFormat(afp->bf, &format, &(afp->fmtd));
      if      (status == eslENOFORMAT) ESL_XFAIL(eslENOFORMAT, afp->errmsg, "couldn't determine alignment input format"); /* ENOFORMAT is normal failure */
      else if (status != eslOK)        goto ERROR;
    }
  afp->format = format;

  /* Determine the alphabet; set <abc>. (<abc> == NULL means text mode.)  */
  /* Note that GuessAlphabet() functions aren't allowed to use the inmap, because it isn't set yet */
#ifdef eslAUGMENT_ALPHABET
  if (byp_abc && *byp_abc)	/* Digital mode, and caller provided the alphabet */
    { 
      abc       = *byp_abc;
      alphatype = abc->type;
    } 
  else if (byp_abc)		/* Digital mode, and caller wants us to guess and create an alphabet */
    {
      status = eslx_msafile_GuessAlphabet(afp, &alphatype);
      if      (status == eslENOALPHABET) ESL_XFAIL(eslENOALPHABET, afp->errmsg, "couldn't guess alphabet (maybe try --dna/--rna/--amino if available)");
      else if (status != eslOK)          goto ERROR;
      if ( (abc = esl_alphabet_Create(alphatype))                == NULL) { status = eslEMEM; goto ERROR; }
    }    
#endif
  if (abc && ! byp_abc) ESL_EXCEPTION(eslEINCONCEIVABLE, "Your version of Easel does not include digital alphabet code."); 
  /* ^^^^^^^^^^^^^^^^^  this test interacts tricksily with the #ifdef above */
  afp->abc = abc;	/* with afp->abc set, the inmap config functions know whether to do digital/text    */

  /**
   * <pre>
   * Configure the format-specific, digital or text mode character
   * input map in afp->inmap.
   * All of these must:
   *    
   *    set inmap[0] to an appropriate 'unknown' character, to replace
   *       invalid input with.
   *    set ' ' to eslDSQ_IGNORE (if we're supposed to accept and skip
   *       it), or map it to a gap, or set it as eslDSQ_ILLEGAL.
   *    in digital mode, copy the abc->inmap
   *    in text mode, decide if we should accept most any
   *        non-whitespace character (isgraph()), or if the format is
   *        inherently restrictive and we should go with isalpha() +
   *        some other valid characters "_-.~*" instead.
   * </pre>
   */
  switch (afp->format) {
  case eslMSAFILE_A2M:          status = esl_msafile_a2m_SetInmap(      afp); break;
  case eslMSAFILE_AFA:          status = esl_msafile_afa_SetInmap(      afp); break;
  case eslMSAFILE_CLUSTAL:      status = esl_msafile_clustal_SetInmap(  afp); break;
  case eslMSAFILE_CLUSTALLIKE:  status = esl_msafile_clustal_SetInmap(  afp); break;
  case eslMSAFILE_PFAM:         status = esl_msafile_stockholm_SetInmap(afp); break;
  case eslMSAFILE_PHYLIP:       status = esl_msafile_phylip_SetInmap(   afp); break;
  case eslMSAFILE_PHYLIPS:      status = esl_msafile_phylip_SetInmap(   afp); break;
  case eslMSAFILE_PSIBLAST:     status = esl_msafile_psiblast_SetInmap( afp); break;
  case eslMSAFILE_SELEX:        status = esl_msafile_selex_SetInmap(    afp); break;
  case eslMSAFILE_STOCKHOLM:    status = esl_msafile_stockholm_SetInmap(afp); break;
  case eslMSAFILE_PROFILLIC:    status = eslOK;                               break; /// \todo status = profillic_esl_msafile_profile_SetInmap(afp); */ break;
  default: ESL_XEXCEPTION(eslENOFORMAT, "no such alignment file format");     break;
  }

  if (esl_byp_IsReturned(byp_abc)) *byp_abc = abc;
  return eslOK;

 ERROR:  /* on normal errors, afp is returned in an error state */
  if (abc && ! esl_byp_IsProvided(byp_abc)) { esl_alphabet_Destroy(abc); }
  if (esl_byp_IsReturned(byp_abc)) *byp_abc = NULL;
  afp->abc = NULL;
  return status;
}