Exemple #1
0
/* Function:  esl_recorder_ResizeTo()
 * Synopsis:  Reallocate an <ESL_RECORDER> for a new <maxlines>
 * Incept:    SRE, Fri Dec 25 17:02:46 2009 [Casa de Gatos]
 *
 * Purpose:   Reallocate the <ESL_RECORDER> <rc> to have a new
 *            window size <maxlines>. 
 *            
 *            The new <maxlines> may be more or less than the previous
 *            window size for <rc>.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> if (re-)allocation fails.
 *
 *            <eslEINVAL> if the recorder has a marked line (for start
 *            of a block) and you try to shrink it so much that that
 *            marked line would be lost.
 *
 *            <eslEINCONCEIVABLE> on any baseline resetting problem;
 *            this would have to be an internal error in the module.
 * 
 * Note:      We may have to repermute the line array, and reset its
 *            baseline, as follows.
 *            
 *            In the growth case: if the line array is out of order
 *            (circularly permuted) we must straighten it out, which
 *            means resetting the baseline.  
 *            i.e. to grow 3 1 2 to nalloc=6, we need 1 2 3 x x x; 
 *            simple reallocation to 3 1 2 x x x doesn't work,
 *            next read would make 3 4 2 x x x.
 *          
 *            In the shrinkage case: if the line array is in use beyond the
 *            new array size, we set a new baseline to keep as much of the
 *            old array as possible.
 * 
 *            i.e. for 6->3  
 *            1 2 3 x x x -> 1 2 3
 *            1 2 3 4 x x -> 2 3 4 with new baseline=2.
 *            4 5 0 1 2 3 -> 3 4 5 with new baseline=3
 */
int
esl_recorder_ResizeTo(ESL_RECORDER *rc, int new_maxlines)
{
  int   idx;
  int   newbase;
  void *tmp;
  int   minlines;
  int   status;

  if (new_maxlines == rc->nalloc) return eslOK;

  if (new_maxlines > rc->nalloc) /* growth case */
    {
      if ((rc->nread - rc->baseline) / rc->nalloc != 0)	/* array is permuted; reorder it */
	{
	  newbase = ESL_MAX(rc->baseline, rc->nread - rc->nalloc);
	  status = recorder_new_baseline(rc, newbase);
	  if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly");
	}
    }
  else 				/* shrinkage case */
    {
      /* check that the marked line (if any) will stay in window */
      if (rc->markline >= 0)	
	{
	  minlines = rc->nread - rc->markline;
	  if (new_maxlines < minlines)
	    ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing marked line");
	}
      /* check that current line will stay in window */
      minlines = rc->nread - rc->ncurr + 1;
      if (new_maxlines < minlines)
	ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing current line");

      if (rc->nread - rc->baseline > new_maxlines) /* baseline needs to move up */
	{
	  newbase = rc->nread - new_maxlines;
	  status = recorder_new_baseline(rc, newbase);
	  if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly");
	}

      for (idx = new_maxlines; idx < rc->nalloc; idx++)
	if (rc->line[idx]) free(rc->line[idx]);
    }

  ESL_RALLOC(rc->line,   tmp, sizeof(char *) * new_maxlines);
  ESL_RALLOC(rc->lalloc, tmp, sizeof(int)    * new_maxlines);
  ESL_RALLOC(rc->offset, tmp, sizeof(off_t)  * new_maxlines);
  for (idx = rc->nalloc; idx < new_maxlines; idx++) /* no-op in shrinkage case */
    { 
      rc->line[idx]   = NULL;
      rc->lalloc[idx] = 0;
      rc->offset[idx] = 0;
    }
  rc->nalloc = new_maxlines;
  return eslOK;

 ERROR:
  return status;
}
Exemple #2
0
int
esl_hmx_GrowTo(ESL_HMX *mx, int L, int M)
{
  uint64_t ncells;
  void    *p;
  int      do_reset = FALSE;
  int      i;
  int      status;

  if (L < mx->allocR && M <= mx->allocM) return eslOK;

  /* Do we have to reallocate the 2D matrix, or can we get away with
   * rejiggering the row pointers into the existing memory? 
   */
  ncells = (L+1) * M;
  if (ncells > mx->ncells) 
    {
      ESL_RALLOC(mx->dp_mem, p, sizeof(float) * ncells);
      mx->ncells = ncells;
      do_reset   = TRUE;
    }

  /* must we reallocate row pointers? */
  if (L >= mx->allocR)
    {
      ESL_RALLOC(mx->dp, p, sizeof(float *) * (L+1));
      ESL_RALLOC(mx->sc, p, sizeof(float)   * (L+2));
      mx->allocR = L+1;
      mx->allocM = M;
      do_reset   = TRUE;
    }

  /* must we widen the rows? */
  if (M > mx->allocM)
    {
      mx->allocM = M;
      do_reset = TRUE;
    }

  /* must we set some more valid row pointers? */
  if (L >= mx->validR)
    do_reset = TRUE;

  /* did we trigger a relayout of row pointers? */
  if (do_reset)
    {
      mx->validR = ESL_MIN(mx->ncells / mx->allocM, mx->allocR);
      for (i = 0; i < mx->validR; i++)
	mx->dp[i] = mx->dp_mem + i*mx->allocM;
    }
  mx->M = 0;
  mx->L = 0;
  return eslOK;

 ERROR:
  return status;
}
Exemple #3
0
/* Function:  p7_gmx_GrowTo()
 * Synopsis:  Assure that DP matrix is big enough.
 *
 * Purpose:   Assures that a DP matrix <gx> is allocated
 *            for a model of size up to <M> and a sequence of
 *            length up to <L>; reallocates if necessary.
 *            
 *            This function does not respect the configured
 *            <RAMLIMIT>; it will allocate what it's told to
 *            allocate. 
 *
 * Returns:   <eslOK> on success, and <gx> may be reallocated upon
 *            return; any data that may have been in <gx> must be 
 *            assumed to be invalidated.
 *
 * Throws:    <eslEMEM> on allocation failure, and any data that may
 *            have been in <gx> must be assumed to be invalidated.
 */
int
p7_gmx_GrowTo(P7_GMX *gx, int M, int L)
{
  int      status;
  void    *p;
  int      i;
  uint64_t ncells;
  int      do_reset = FALSE;

  if (M < gx->allocW && L < gx->validR) return eslOK;
  
  /* must we realloc the 2D matrices? (or can we get away with just
   * jiggering the row pointers, if we are growing in one dimension
   * while shrinking in another?)
   */
  ncells = (uint64_t) (M+1) * (uint64_t) (L+1);
  if (ncells > gx->ncells) 
    {
      ESL_RALLOC(gx->dp_mem, p, sizeof(float) * ncells * p7G_NSCELLS);
      gx->ncells = ncells;
      do_reset   = TRUE;
    }

  /* must we reallocate the row pointers? */
  if (L >= gx->allocR)
    {
      ESL_RALLOC(gx->xmx, p, sizeof(float)   * (L+1) * p7G_NXCELLS);
      ESL_RALLOC(gx->dp,  p, sizeof(float *) * (L+1));
      gx->allocR = L+1;		/* allocW will also get set, in the do_reset block */
      do_reset   = TRUE;
    }

  /* must we widen the rows? */
  if (M >= gx->allocW) do_reset = TRUE;

  /* must we set some more valid row pointers? */
  if (L >= gx->validR) do_reset = TRUE;

  /* resize the rows and reset all the valid row pointers.*/
  if (do_reset)
    {
      gx->allocW = M+1;
      gx->validR = ESL_MIN(gx->ncells / gx->allocW, gx->allocR);
      for (i = 0; i < gx->validR; i++) 
	gx->dp[i] = gx->dp_mem + i * (gx->allocW) * p7G_NSCELLS;
    }

  gx->M      = 0;
  gx->L      = 0;
  return eslOK;

 ERROR:
  return status;
}
Exemple #4
0
/* Function:  p7_tophits_Grow()
 * Synopsis:  Reallocates a larger hit list, if needed.
 *
 * Purpose:   If list <h> cannot hold another hit, doubles
 *            the internal allocation.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation failure; in this case,
 *            the data in <h> are unchanged.
 */
int
p7_tophits_Grow(P7_TOPHITS *h)
{
  void     *p;
  P7_HIT   *ori    = h->unsrt;
  int       Nalloc = h->Nalloc * 2;    /* grow by doubling */
  int       i;
  int       status;

  if (h->N < h->Nalloc) return eslOK; /* we have enough room for another hit */

  if (( status = p7_hit_Grow( &(h->unsrt), h->Nalloc, Nalloc)) != eslOK) goto ERROR;
  ESL_RALLOC(h->hit,   p, sizeof(P7_HIT *) * Nalloc);

  /* If we grow a sorted list, we have to translate the pointers
   * in h->hit, because h->unsrt might have just moved in memory. 
   */
  if (h->is_sorted_by_seqidx || h->is_sorted_by_sortkey)
    {
      for (i = 0; i < h->N; i++)
        h->hit[i] = h->unsrt + (h->hit[i] - ori);
    }

  h->Nalloc = Nalloc;
  return eslOK;

 ERROR:
  return eslEMEM;
}
Exemple #5
0
/* Function:  p7_oprofile_MPIRecv()
 * Synopsis:  Receives an OPROFILE as a work unit from an MPI sender.
 * Incept:    MSF, Wed Oct 21, 2009 [Janelia]
 *
 * Purpose:   Receive a work unit that consists of a single OPROFILE
 *            sent by MPI <source> (<0..nproc-1>, or
 *            <MPI_ANY_SOURCE>) tagged as <tag> for MPI communicator <comm>.
 *            
 *            Work units are prefixed by a status code. If the unit's
 *            code is <eslOK> and no errors are encountered, this
 *            routine will return <eslOK> and a non-<NULL> <*ret_om>.
 *            If the unit's code is <eslEOD> (a shutdown signal), 
 *            this routine returns <eslEOD> and <*ret_om> is <NULL>.
 *   
 *            Caller provides a working buffer <*buf> of size
 *            <*nalloc> characters. These are passed by reference, so
 *            that <*buf> can be reallocated and <*nalloc> increased
 *            if necessary. As a special case, if <*buf> is <NULL> and
 *            <*nalloc> is 0, the buffer will be allocated
 *            appropriately, but the caller is still responsible for
 *            free'ing it.
 *            
 *            Caller may or may not already know what alphabet the OPROFILE
 *            is expected to be in.  A reference to the current
 *            alphabet is passed in <abc>. If the alphabet is unknown,
 *            pass <*abc = NULL>, and when the OPROFILE is received, an
 *            appropriate new alphabet object is allocated and passed
 *            back to the caller via <*abc>.  If the alphabet is
 *            already known, <*ret_abc> is that alphabet, and the new
 *            OPROFILE's alphabet type is verified to agree with it. This
 *            mechanism allows an application to let the first OPROFILE
 *            determine the alphabet type for the application, while
 *            still keeping the alphabet under the application's scope
 *            of control.
 *
 * Returns:   <eslOK> on success. <*ret_om> contains the received OPROFILE;
 *            it is allocated here, and the caller is responsible for
 *            free'ing it.  <*buf> may have been reallocated to a
 *            larger size, and <*nalloc> may have been increased.  If
 *            <*abc> was passed as <NULL>, it now points to an
 *            <ESL_ALPHABET> object that was allocated here; caller is
 *            responsible for free'ing this.
 *            
 *            Returns <eslEOD> if an end-of-data signal was received.
 *            In this case, <*buf>, <*nalloc>, and <*abc> are left unchanged,
 *            and <*ret_om> is <NULL>.
 *            
 *            Returns <eslEINCOMPAT> if the OPROFILE is in a different alphabet
 *            than <*abc> said to expect. In this case, <*abc> is unchanged,
 *            <*buf> and <*nalloc> may have been changed, and <*ret_om> is
 *            <NULL>.
 *            
 * Throws:    <eslEMEM> on allocation error, in which case <*ret_om> is 
 *            <NULL>.           
 */
int
p7_oprofile_MPIRecv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_ALPHABET **abc, P7_OPROFILE **ret_om)
{
  int         status;
  int         code;
  P7_OPROFILE     *om     = NULL;
  int         n;
  int         pos;
  MPI_Status  mpistatus;

  /* Probe first, because we need to know if our buffer is big enough. */
  MPI_Probe(source, tag, comm, &mpistatus);
  MPI_Get_count(&mpistatus, MPI_PACKED, &n);

  /* Make sure the buffer is allocated appropriately */
  if (*buf == NULL || n > *nalloc) {
    void *tmp;
    ESL_RALLOC(*buf, tmp, sizeof(char) * n);
    *nalloc = n; 
  }

  /* Receive the packed work unit */
  MPI_Recv(*buf, n, MPI_PACKED, source, tag, comm, &mpistatus);

  /* Unpack it, looking at the status code prefix for EOD/EOK  */
  pos = 0;
  if (MPI_Unpack(*buf, n, &pos, &code, 1, MPI_INT, comm) != 0) ESL_XEXCEPTION(eslESYS, "mpi unpack failed");
  if (code == eslEOD)  { *ret_om = NULL;  return eslEOD; }

  return p7_oprofile_MPIUnpack(*buf, *nalloc, &pos, comm, abc, ret_om);

 ERROR:
  if (om != NULL) p7_oprofile_Destroy(om);
  return status;
}
Exemple #6
0
/* Function:  esl_hmm_Emit()
 * Synopsis:  Emit a sequence from an HMM.
 *
 * Purpose:   Sample one sequence from an <hmm>, using random
 *            number generator <r>. Optionally return the sequence,
 *            the state path, and/or the length via <opt_dsq>, 
 *            <opt_path>, and <opt_L>.
 *            
 *            If <opt_dsq> or <opt_path> are requested, caller
 *            becomes responsible for free'ing their memory.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    (no abnormal error conditions)
 */
int
esl_hmm_Emit(ESL_RANDOMNESS *r, const ESL_HMM *hmm, ESL_DSQ **opt_dsq, int **opt_path, int *opt_L)
{
  int      k, L, allocL;
  int     *path = NULL;
  ESL_DSQ *dsq  = NULL;
  void    *tmp  = NULL;
  int      status;
  
  ESL_ALLOC(dsq,  sizeof(ESL_DSQ) * 256);
  ESL_ALLOC(path, sizeof(int)     * 256);
  allocL = 256;

  dsq[0]  = eslDSQ_SENTINEL;
  path[0] = -1;
  
  k = esl_rnd_FChoose(r, hmm->pi, hmm->M+1);
  L = 0;
  while (k != hmm->M)		/* M is the implicit end state */
    {
      L++;
      if (L >= allocL-1) {	/* Reallocate path and seq if needed */
	ESL_RALLOC(dsq,  tmp, sizeof(ESL_DSQ) * (allocL*2));
	ESL_RALLOC(path, tmp, sizeof(int)     * (allocL*2));
	allocL *= 2;
      }
	
      path[L] = k;
      dsq[L]  = esl_rnd_FChoose(r, hmm->e[k], hmm->abc->K);
      k       = esl_rnd_FChoose(r, hmm->t[k], hmm->M+1);
    }

  path[L+1] = hmm->M;		/* sentinel for "end state" */
  dsq[L+1]  = eslDSQ_SENTINEL;
  
  if (opt_dsq  != NULL) *opt_dsq  = dsq;   else free(dsq);
  if (opt_path != NULL) *opt_path = path;  else free(path);
  if (opt_L    != NULL) *opt_L    = L;     
  return eslOK;

 ERROR:
  if (path != NULL) free(path);
  if (dsq  != NULL) free(dsq);
  return status;
}
Exemple #7
0
/* Function:  p7_oprofile_MPISend()
 * Synopsis:  Send an OPROFILE as an MPI work unit.
 * Incept:    MSF, Wed Oct 21, 2009 [Janelia]
 *
 * Purpose:   Sends an OPROFILE <om> as a work unit to MPI process
 *            <dest> (where <dest> ranges from 0..<nproc-1>), tagged
 *            with MPI tag <tag>, for MPI communicator <comm>, as 
 *            the sole workunit or result. 
 *            
 *            Work units are prefixed by a status code. If <hmm> is
 *            <non-NULL>, the work unit is an <eslOK> code followed by
 *            the packed HMM. If <hmm> is NULL, the work unit is an
 *            <eslEOD> code, which <p7_hmm_MPIRecv()> knows how to
 *            interpret; this is typically used for an end-of-data
 *            signal to cleanly shut down worker processes.
 *            
 *            In order to minimize alloc/free cycles in this routine,
 *            caller passes a pointer to a working buffer <*buf> of
 *            size <*nalloc> characters. If necessary (i.e. if <hmm> is
 *            too big to fit), <*buf> will be reallocated and <*nalloc>
 *            increased to the new size. As a special case, if <*buf>
 *            is <NULL> and <*nalloc> is 0, the buffer will be
 *            allocated appropriately, but the caller is still
 *            responsible for free'ing it.
 *            
 * Returns:   <eslOK> on success; <*buf> may have been reallocated and
 *            <*nalloc> may have been increased.
 * 
 * Throws:    <eslESYS> if an MPI call fails; <eslEMEM> if a malloc/realloc
 *            fails. In either case, <*buf> and <*nalloc> remain valid and useful
 *            memory (though the contents of <*buf> are undefined). 
 * 
 * Note:      Compare to p7_hmmfile_WriteBinary(). The two operations (sending
 *            an HMM via MPI, or saving it as a binary file to disk) are
 *            similar.
 */
int
p7_oprofile_MPISend(P7_OPROFILE *om, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc)
{
  int   status;
  int   code;
  int   sz, n, pos;

  /* Figure out size */
  if (MPI_Pack_size(1, MPI_INT, comm, &n) != 0) ESL_XEXCEPTION(eslESYS, "mpi pack size failed");
  if (om != NULL) {
    if ((status = p7_oprofile_MPIPackSize(om, comm, &sz)) != eslOK) return status;
    n += sz;
  }

  /* Make sure the buffer is allocated appropriately */
  if (*buf == NULL || n > *nalloc) {
    void *tmp;
    ESL_RALLOC(*buf, tmp, sizeof(char) * n);
    *nalloc = n; 
  }

  /* Pack the status code and OPROFILE into the buffer */
  pos  = 0;
  code = (om == NULL) ? eslEOD : eslOK;
  if (MPI_Pack(&code, 1, MPI_INT, *buf, n, &pos, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi pack failed");
  if (om != NULL) {
    if ((status = p7_oprofile_MPIPack(om, *buf, n, &pos, comm)) != eslOK) return status;
  }

  /* Send the packed OPROFILE to the destination. */
  if (MPI_Send(*buf, n, MPI_PACKED, dest, tag, comm) != 0)  ESL_EXCEPTION(eslESYS, "mpi send failed");
  return eslOK;

 ERROR:
  return status;
}
Exemple #8
0
/* Function: GrowCP9Matrix()
 *
 * Purpose:  Reallocate a CP9 dp matrix, if necessary, for seq for
 *           length N, or 2 rows (if we're scanning in memory 
 *           efficient mode, in this case N == 1, nrows = N+1).
 * 
 *           Note: unlike HMMER, M never changes, so we only have
 *           to worry about increasing the number of rows if nec.
 *           
 *           Returns individual ptrs to the four matrix components
 *           as a convenience.
 *           
 *           This function allocates the requested matrix regardless
 *           of it's size.
 * 
 *           If kmin and kmax are non-NULL, the matrix will be a p7
 *           HMM banded matrix as defined by bands in kmin, kmax.
 *           In this case N must be length of the sequence. If caller
 *           wants a non-banded CP9 matrix, pass kmin = kmax = NULL.
 *
 * Args:     mx    - an already allocated matrix to grow.
 *           N     - seq length to allocate for; N+1 rows
 *           M     - size of model, contract enforces this must == mx->M
 *           kmin  - OPTIONAL: [0.1..i..N] minimum k for residue i
 *           kmax  - OPTIONAL: [0.1..i..N] maximum k for residue i
 *           mmx, imx, dmx, elmx, erow 
 *                 - RETURN: ptrs to four mx components as a convenience
 *                   
 * Return:   eslOK on success, eslEINCOMPAT if contract is violated,
 *           mx is (re)allocated here.
 */
int
GrowCP9Matrix(CP9_MX *mx, char *errbuf, int N, int M, int *kmin, int *kmax, int ***mmx, int ***imx, int ***dmx, int ***elmx, int **erow)
{
  int status;
  void *p;
  int i;
  int ncells_needed = 0;
  int do_banded;
  int cur_ncells = 0;
  int do_reallocate;

  if(mx->M != M) ESL_FAIL(eslEINCOMPAT, errbuf, "GrowCP9Matrix(), mx->M: %d != M passed in: %d\n", mx->M, M);
  if(N < 0)      ESL_FAIL(eslEINCOMPAT, errbuf, "GrowCP9Matrix(), N: %d < 0\n", N);

  do_banded = (kmin != NULL && kmax == NULL) ?  TRUE : FALSE;
  if(do_banded) { 
    for (i = 0; i <= N; i++) ncells_needed += (kmax[i] - kmin[i] + 1);
  }
  else ncells_needed = (N+1) * (M+1);
  do_reallocate = (ncells_needed <= mx->ncells_allocated) ? FALSE : TRUE;

  if(do_reallocate) { 
    /* we need more space */
    ESL_RALLOC(mx->mmx,  p, sizeof(int *) * (N+1));
    ESL_RALLOC(mx->imx,  p, sizeof(int *) * (N+1));
    ESL_RALLOC(mx->dmx,  p, sizeof(int *) * (N+1));
    ESL_RALLOC(mx->elmx, p, sizeof(int *) * (N+1)); 
    ESL_RALLOC(mx->erow, p, sizeof(int)   * (N+1));
    ESL_RALLOC(mx->mmx_mem,  p, sizeof(int) * ncells_needed);
    ESL_RALLOC(mx->imx_mem,  p, sizeof(int) * ncells_needed);
    ESL_RALLOC(mx->dmx_mem,  p, sizeof(int) * ncells_needed);
    ESL_RALLOC(mx->elmx_mem, p, sizeof(int) * ncells_needed);
    mx->ncells_allocated = ncells_needed;

    /* update size */
    mx->size_Mb =  (float) sizeof(CP9_MX);
    mx->size_Mb += (float) (sizeof(int *) * (N+1) * 4);           /* mx->*mx ptrs */
    mx->size_Mb += (float) (sizeof(int)   * (ncells_needed * 4)); /* mx->*mx_mem */
    mx->size_Mb += (float) (sizeof(int)   * (N+1));               /* mx->erow */
    mx->size_Mb /= 1000000.;
  }

  if(do_banded || do_reallocate) { /* rearrange pointers */
    mx->mmx[0]  = mx->mmx_mem;
    mx->imx[0]  = mx->imx_mem;
    mx->dmx[0]  = mx->dmx_mem;
    mx->elmx[0] = mx->elmx_mem;

    if(do_banded) { 
      cur_ncells = kmax[0] - kmin[0] + 1;
      for (i = 1; i <= N; i++) {
	mx->mmx[i] = mx->mmx[0] + cur_ncells;
	mx->imx[i] = mx->imx[0] + cur_ncells;
	mx->dmx[i] = mx->dmx[0] + cur_ncells;
	mx->elmx[i]= mx->elmx[0]+ cur_ncells;
	cur_ncells += kmax[i] - kmin[i] + 1;
      }
    }
    else { /* non-banded, we only get here if we didn't go to done, i.e. we reallocated */
      for (i = 1; i <= N; i++) {
	mx->mmx[i] = mx->mmx[0] + (i*(M+1));
	mx->imx[i] = mx->imx[0] + (i*(M+1));
	mx->dmx[i] = mx->dmx[0] + (i*(M+1));
	mx->elmx[i]= mx->elmx[0]+ (i*(M+1));
      }
    }
  }

  mx->rows = N;
  mx->kmin = kmin; /* could be NULL */
  mx->kmax = kmax; /* could be NULL */
  mx->ncells_valid = ncells_needed;
  if (mmx != NULL) *mmx = mx->mmx;
  if (imx != NULL) *imx = mx->imx;
  if (dmx != NULL) *dmx = mx->dmx;
  if (elmx!= NULL) *elmx= mx->elmx;
  if (erow != NULL) *erow = mx->erow;
  return eslOK;

 ERROR:
  ESL_FAIL(status, errbuf, ("GrowCP9Matrix(), memory reallocation error."));
}
Exemple #9
0
/* Function:  p7_omx_GrowTo()
 * Synopsis:  Assure that a DP matrix is big enough.
 * Incept:    SRE, Thu Dec 20 09:27:07 2007 [Janelia]
 *
 * Purpose:   Assures that an optimized DP matrix <ox> is allocated for
 *            a model up to <allocM> in length; if not, reallocate to
 *            make it so.
 *            
 *            Because the optimized matrix is one-row, only the model
 *            length matters; the target sequence length isn't
 *            relevant.
 *
 * Returns:   <eslOK> on success, and <gx> may be reallocated upon
 *            return; any data that may have been in <gx> must be 
 *            assumed to be invalidated.
 *
 * Throws:    <eslEMEM> on allocation failure, and any data that may
 *            have been in <gx> must be assumed to be invalidated.
 */
int
p7_omx_GrowTo(P7_OMX *ox, int allocM, int allocL, int allocXL)
{
  void  *p;
  int    nqf  = p7O_NQF(allocM);	       /* segment length; total # of striped vectors for uchar */
  int    nqw  = p7O_NQW(allocM);	       /* segment length; total # of striped vectors for float */
  int    nqb  = p7O_NQB(allocM);	       /* segment length; total # of striped vectors for float */
  size_t ncells = (allocL+1) * nqf * 4;
  int    reset_row_pointers = FALSE;
  int    i;
  int    status;
 
  /* If all possible dimensions are already satisfied, the matrix is fine */
  if (ox->allocQ4*4 >= allocM && ox->validR > allocL && ox->allocXR >= allocXL+1) return eslOK;

  /* If the main matrix is too small in cells, reallocate it; 
   * and we'll need to realign/reset the row pointers later.
   */
  if (ncells > ox->ncells)
    {
      ESL_RALLOC(ox->dp_mem, p, sizeof(vector float) * (allocL+1) * nqf * p7X_NSCELLS + 15);
      ox->ncells = ncells;
      reset_row_pointers = TRUE;
    }

  /* If the X beams are too small, reallocate them. */
  if (allocXL+1 >= ox->allocXR)
    {
      ESL_RALLOC(ox->x_mem, p,  sizeof(float) * (allocXL+1) * p7X_NXCELLS + 15); 
      ox->allocXR = allocXL+1;
      ox->xmx     = (float *) ((unsigned long int) ((char *) ox->x_mem  + 15) & (~0xf));
    }

  /* If there aren't enough rows, reallocate the row pointers; we'll
   * realign and reset them later.
   */
  if (allocL >= ox->allocR)
    {
      ESL_RALLOC(ox->dpb, p, sizeof(vector unsigned char *) * (allocL+1));
      ESL_RALLOC(ox->dpw, p, sizeof(vector signed short * ) * (allocL+1));
      ESL_RALLOC(ox->dpf, p, sizeof(vector float *)         * (allocL+1));
      ox->allocR         = allocL+1;
      reset_row_pointers = TRUE;
    }

  /* must we widen the rows? */
  if (allocM > ox->allocQ4*4)
    reset_row_pointers = TRUE;

  /* must we set some more valid row pointers? */
  if (allocL >= ox->validR)
    reset_row_pointers = TRUE;

  /* now reset the row pointers, if needed */
  if (reset_row_pointers)
    {
      ox->dpb[0] = (vector unsigned char *) ((unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf));
      ox->dpw[0] = (vector signed short *) ox->dpb[0];
      ox->dpf[0] = (vector float *) ox->dpb[0];

      ox->validR = ESL_MIN( ox->ncells / (nqf * 4), ox->allocR);
      for (i = 1; i < ox->validR; i++)
	{
	  ox->dpb[i] = ox->dpb[0] + i * nqb;
	  ox->dpw[i] = ox->dpw[0] + i * nqw * p7X_NSCELLS;
	  ox->dpf[i] = ox->dpf[0] + i * nqf * p7X_NSCELLS;
	}

      ox->allocQ4  = nqf;
      ox->allocQ8  = nqw;
      ox->allocQ16 = nqb;
    }
  
  ox->M = 0;
  ox->L = 0;
  return eslOK;

 ERROR:
  return status;
}  
Exemple #10
0
static void
mpi_worker(const ESL_GETOPTS *go, struct cfg_s *cfg)
{
  int           xstatus = eslOK;
  int           status;
  int           type;
  P7_BUILDER   *bld         = NULL;
  ESL_MSA      *msa         = NULL;
  ESL_MSA      *postmsa     = NULL;
  ESL_MSA     **postmsa_ptr = (cfg->postmsafile != NULL) ? &postmsa : NULL;
  P7_HMM       *hmm         = NULL;
  P7_BG        *bg          = NULL;
  char         *wbuf        = NULL;	/* packed send/recv buffer  */
  void         *tmp;			/* for reallocation of wbuf */
  int           wn          = 0;	/* allocation size for wbuf */
  int           sz, n;		        /* size of a packed message */
  int           pos;
  char          errmsg[eslERRBUFSIZE];

  /* After master initialization: master broadcasts its status.
   */
  MPI_Bcast(&xstatus, 1, MPI_INT, 0, MPI_COMM_WORLD);
  if (xstatus != eslOK) return; /* master saw an error code; workers do an immediate normal shutdown. */
  ESL_DPRINTF2(("worker %d: sees that master has initialized\n", cfg->my_rank));
  
  /* Master now broadcasts worker initialization information (alphabet type) 
   * Workers returns their status post-initialization.
   * Initial allocation of wbuf must be large enough to guarantee that
   * we can pack an error result into it, because after initialization,
   * errors will be returned as packed (code, errmsg) messages.
   */
  MPI_Bcast(&type, 1, MPI_INT, 0, MPI_COMM_WORLD);
  if (xstatus == eslOK) { if ((cfg->abc = esl_alphabet_Create(type))      == NULL)    xstatus = eslEMEM; }
  if (xstatus == eslOK) { wn = 4096;  if ((wbuf = malloc(wn * sizeof(char))) == NULL) xstatus = eslEMEM; }
  if (xstatus == eslOK) { if ((bld = p7_builder_Create(go, cfg->abc))     == NULL)    xstatus = eslEMEM; }
  MPI_Reduce(&xstatus, &status, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); /* everyone sends xstatus back to master */
  if (xstatus != eslOK) {
    if (wbuf != NULL) free(wbuf);
    if (bld  != NULL) p7_builder_Destroy(bld);
    return; /* shutdown; we passed the error back for the master to deal with. */
  }

  bg = p7_bg_Create(cfg->abc);

  ESL_DPRINTF2(("worker %d: initialized\n", cfg->my_rank));

                      /* source = 0 (master); tag = 0 */
  while (esl_msa_MPIRecv(0, 0, MPI_COMM_WORLD, cfg->abc, &wbuf, &wn, &msa) == eslOK) 
    {
      /* Build the HMM */
      ESL_DPRINTF2(("worker %d: has received MSA %s (%d columns, %d seqs)\n", cfg->my_rank, msa->name, msa->alen, msa->nseq));
      if ((status = p7_Builder(bld, msa, bg, &hmm, NULL, NULL, NULL, postmsa_ptr)) != eslOK) { strcpy(errmsg, bld->errbuf); goto ERROR; }
      ESL_DPRINTF2(("worker %d: has produced an HMM %s\n", cfg->my_rank, hmm->name));

      /* Calculate upper bound on size of sending status, HMM, and optional postmsa; make sure wbuf can hold it. */
      n = 0;
      if (MPI_Pack_size(1,    MPI_INT, MPI_COMM_WORLD, &sz) != 0)     goto ERROR;   n += sz;
      if (p7_hmm_MPIPackSize( hmm,     MPI_COMM_WORLD, &sz) != eslOK) goto ERROR;   n += sz;
      if (esl_msa_MPIPackSize(postmsa, MPI_COMM_WORLD, &sz) != eslOK) goto ERROR;   n += sz;
      if (n > wn) { ESL_RALLOC(wbuf, tmp, sizeof(char) * n); wn = n; }
      ESL_DPRINTF2(("worker %d: has calculated that HMM will pack into %d bytes\n", cfg->my_rank, n));

      /* Send status, HMM, and optional postmsa back to the master */
      pos = 0;
      if (MPI_Pack       (&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD) != 0)     goto ERROR;
      if (p7_hmm_MPIPack (hmm,                 wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR;
      if (esl_msa_MPIPack(postmsa,             wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR;
      MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD);
      ESL_DPRINTF2(("worker %d: has sent HMM to master in message of %d bytes\n", cfg->my_rank, pos));

      esl_msa_Destroy(msa);     msa     = NULL;
      esl_msa_Destroy(postmsa); postmsa = NULL;
      p7_hmm_Destroy(hmm);      hmm     = NULL;
    }

  if (wbuf != NULL) free(wbuf);
  p7_builder_Destroy(bld);
  return;

 ERROR:
  ESL_DPRINTF2(("worker %d: fails, is sending an error message, as follows:\n%s\n", cfg->my_rank, errmsg));
  pos = 0;
  MPI_Pack(&status, 1,                MPI_INT,  wbuf, wn, &pos, MPI_COMM_WORLD);
  MPI_Pack(errmsg,  eslERRBUFSIZE,    MPI_CHAR, wbuf, wn, &pos, MPI_COMM_WORLD);
  MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD);
  if (wbuf != NULL) free(wbuf);
  if (msa  != NULL) esl_msa_Destroy(msa);
  if (hmm  != NULL) p7_hmm_Destroy(hmm);
  if (bld  != NULL) p7_builder_Destroy(bld);
  return;
}
/* Function:  main()
 * Synopsis:  Run set of queries against an FM
 * Purpose:   Read in a FM and a file of query sequences.
 *            For each query, find matching FM interval, then collect positions in
 *            the original text T for the corresponding occurrences. These positions
 *            are 0-based (so first character is position 0).
 */
int
main(int argc,  char *argv[]) 
{
  void* tmp; // used for RALLOC calls
  clock_t t1, t2;
  struct tms ts1, ts2;
  char *fname_fm      = NULL;
  char *fname_queries = NULL;
  FM_HIT *hits        = NULL;
  char *line          = NULL;
  int status        = eslOK;
  int hit_cnt       = 0;
  int hit_indiv_cnt = 0;
  int miss_cnt      = 0;
  int hit_num       = 0;
  int hit_num2       = 0;
  int hits_size     = 0;
  int i,j;
  int count_only    = 0;

  FM_INTERVAL interval;
  FM_DATA *fmsf = NULL;
  FM_DATA *fmsb = NULL;
  FILE* fp_fm   = NULL;
  FILE* fp      = NULL;
  FILE* out     = NULL;
  char *outname = NULL;

  ESL_GETOPTS     *go  = NULL;    /* command line processing                 */
  FM_CFG *cfg;
  FM_METADATA *meta;

  ESL_SQ       *tmpseq;  // used for sequence validation
  ESL_ALPHABET *abc = NULL;


  //start timer
  t1 = times(&ts1);

  process_commandline(argc, argv, &go, &fname_fm, &fname_queries);


  if (esl_opt_IsOn(go, "--out")) {
    outname = esl_opt_GetString(go, "--out");
    if ( esl_strcmp ("-", outname) == 0 ) {
      out = stdout;
      outname = "stdout";
    } else {
      out = fopen(outname,"w");
    }
  }

  if (esl_opt_IsOn(go, "--count_only"))
    count_only = 1;


  if((fp_fm = fopen(fname_fm, "rb")) == NULL)
    esl_fatal("Cannot open file `%s': ", fname_fm);


  fm_configAlloc(&cfg);
  cfg->occCallCnt = 0;
  meta = cfg->meta;
  meta->fp = fp_fm;


  fm_readFMmeta( meta);



  if      (meta->alph_type == fm_DNA)   abc     = esl_alphabet_Create(eslDNA);
  else if (meta->alph_type == fm_AMINO) abc     = esl_alphabet_Create(eslAMINO);
  tmpseq = esl_sq_CreateDigital(abc);



  //read in FM-index blocks
  ESL_ALLOC(fmsf, meta->block_count * sizeof(FM_DATA) );
  if (!meta->fwd_only)
    ESL_ALLOC(fmsb, meta->block_count * sizeof(FM_DATA) );

  for (i=0; i<meta->block_count; i++) {
    fm_FM_read( fmsf+i,meta, TRUE );

    if (!meta->fwd_only) {
      fm_FM_read(fmsb+i, meta, FALSE );
      fmsb[i].SA = fmsf[i].SA;
      fmsb[i].T = fmsf[i].T;
    }
  }
  fclose(fp_fm);

  output_header(meta, stdout, go, fname_fm, fname_queries);


  /* initialize a few global variables, then call initGlobals
   * to do architecture-specific initialization
   */
  fm_configInit(cfg, NULL);

  fm_alphabetCreate(meta, NULL); // don't override charBits

  fp = fopen(fname_queries,"r");
  if (fp == NULL)
    esl_fatal("Unable to open file %s\n", fname_queries);

  ESL_ALLOC(line, FM_MAX_LINE * sizeof(char));

  hits_size = 200;
  ESL_ALLOC(hits, hits_size * sizeof(FM_HIT));

  while(fgets(line, FM_MAX_LINE, fp) ) {
    int qlen=0;
    while (line[qlen] != '\0' && line[qlen] != '\n')  qlen++;
    if (line[qlen] == '\n')  line[qlen] = '\0';

    hit_num = 0;

    for (i=0; i<meta->block_count; i++) {

      fm_getSARangeReverse(fmsf+i, cfg, line, meta->inv_alph, &interval);
      if (interval.lower>=0 && interval.lower <= interval.upper) {
        int new_hit_num =  interval.upper - interval.lower + 1;
        hit_num += new_hit_num;
        if (!count_only) {
          if (hit_num > hits_size) {
            hits_size = 2*hit_num;
            ESL_RALLOC(hits, tmp, hits_size * sizeof(FM_HIT));
          }
          getFMHits(fmsf+i, cfg, &interval, i, hit_num-new_hit_num, qlen, hits, fm_forward);
        }

      }


      /* find reverse hits, using backward search on the forward FM*/
      if (!meta->fwd_only) {
        fm_getSARangeForward(fmsb+i, cfg, line, meta->inv_alph, &interval);// yes, use the backward fm to produce the equivalent of a forward search on the forward fm
        if (interval.lower>=0 && interval.lower <= interval.upper) {
          int new_hit_num =  interval.upper - interval.lower + 1;
          hit_num += new_hit_num;
          if (!count_only) {
            if (hit_num > hits_size) {
              hits_size = 2*hit_num;
              ESL_RALLOC(hits, tmp, hits_size * sizeof(FM_HIT));
            }
            //even though I used fmsb above, use fmsf here, since we'll now do a backward trace
            //in the FM-index to find the next sampled SA position
            getFMHits(fmsf+i, cfg, &interval, i, hit_num-new_hit_num, qlen, hits, fm_backward);
          }
        }

      }

    }


    if (hit_num > 0) {
      if (count_only) {
        hit_cnt++;
        hit_indiv_cnt += hit_num;
      } else {
        hit_num2 = 0;

        //for each hit, identify the sequence id and position within that sequence
        for (i = 0; i< hit_num; i++) {

          status = fm_getOriginalPosition (fmsf, meta, hits[i].block, hits[i].length, fm_forward, hits[i].start,  &(hits[i].block), &(hits[i].start) );
          hits[i].sortkey = (status==eslERANGE ? -1 : meta->seq_data[ hits[i].block ].target_id);

          //validate match - if any characters in orig sequence were ambiguities, reject
          fm_convertRange2DSQ( fmsf, meta, hits[i].start, hits[i].length, p7_NOCOMPLEMENT, tmpseq, TRUE );
          for (j=1; j<=hits[i].length; j++) {
            if (tmpseq->dsq[j] >= abc->K) {
              hits[i].sortkey = -1; //reject
              j = hits[i].length+1; //quit looking
            }
          }

          if (hits[i].sortkey != -1)
            hit_num2++; // legitimate hit

        }
        if (hit_num2 > 0)
          hit_cnt++;

        //now sort according the the sequence_id corresponding to that seq_offset
        qsort(hits, hit_num, sizeof(FM_HIT), hit_sorter);

        //skim past the skipped entries
        i = 0;
        while ( i < hit_num ) {
          if (hits[i].sortkey != -1 )
            break;  //
          i++;
        }


        if (i < hit_num) {
          if (out != NULL) {
            fprintf (out, "%s\n",line);
            //fprintf (out, "\t%10s (%8d %s)\n",meta->seq_data[ hits[i].block ].name, hits[i].start, (hits[i].direction==fm_forward?"+":"-"));
            fprintf (out, "    %8ld %s %10s\n", (long)(hits[i].start), (hits[i].direction==fm_forward?"f":"r"), meta->seq_data[ hits[i].block ].name);
          }
          hit_indiv_cnt++;
          i++; // skip the first one, since I'll be comparing each to the previous

          for (  ; i< hit_num; i++) {
            if ( //meta->seq_data[ hits[i].block ].id != meta->seq_data[ hits[i-1].block ].id ||
                 hits[i].sortkey   != hits[i-1].sortkey ||  //sortkey is seq_data[].id
                 hits[i].direction != hits[i-1].direction ||
                 hits[i].start     != hits[i-1].start )
            {
              if (out != NULL)
                //fprintf (out, "\t%10s (%8d %s)\n",meta->seq_data[ hits[i].block ].name, hits[i].start, (hits[i].direction==fm_forward?"+":"-"));
                fprintf (out, "    %8ld %s %10s\n", (long)(hits[i].start), (hits[i].direction==fm_forward?"f":"r"), meta->seq_data[ hits[i].block ].name);
              hit_indiv_cnt++;
            }
          }
          if (out != NULL)
            fprintf (out, "\n");
        }
      }
    } else {
      miss_cnt++;
    }


  }

  for (i=0; i<meta->block_count; i++) {
    fm_FM_destroy( fmsf+i, 1 );
    if (!meta->fwd_only)
      fm_FM_destroy( fmsb+i, 0 );
  }


  free (hits);
  free (line);
  fclose(fp);

  fm_configDestroy(cfg);


  // compute and print the elapsed time in millisec
  t2 = times(&ts2);
  {
    double clk_ticks = sysconf(_SC_CLK_TCK);
    double elapsedTime = (t2-t1)/clk_ticks;
    double throughput = cfg->occCallCnt/elapsedTime;

    fprintf (stderr, "hit: %-10d  (%d)\n", hit_cnt, hit_indiv_cnt);
    fprintf (stderr, "miss:%-10d\n", miss_cnt);
    fprintf (stderr, "run time:  %.2f seconds\n", elapsedTime);
    fprintf (stderr, "occ calls: %12s\n", commaprint(cfg->occCallCnt));
    fprintf (stderr, "occ/sec:   %12s\n", commaprint(throughput));
  }

  exit(eslOK);


ERROR:
  printf ("failure allocating memory for hits\n");
  exit(status);


}
/* Each test sequence will contain one or two domains, depending on whether --single is set.
 */
static int
synthesize_positives(ESL_GETOPTS *go, struct cfg_s *cfg, char *testname, ESL_STACK *teststack, int *ret_ntest)
{
  ESL_SQ *domain1, *domain2;
  ESL_SQ *sq;
  void   *p;
  int64_t L;			/* total length of synthetic test seq */
  int     d1n, d2n;		/* lengths of two domains             */
  int     L1,L2,L3;		/* lengths of three random regions    */
  int     i,j;
  int     ntest = 0;
  int     ndomains = ( (esl_opt_GetBoolean(go, "--single") == TRUE) ? 1 : 2);
  int     status;

  while (esl_stack_ObjectCount(teststack) >= ndomains)
    {
      ESL_RALLOC(cfg->test_lens, p, (cfg->ntest+1) * sizeof(struct testseq_s));

      /* Pop our one or two test domains off the stack */
      esl_stack_PPop(teststack, &p);   
      domain1 = p; 
      d1n     = domain1->n;

      if (ndomains == 2)
	{
	  esl_stack_PPop(teststack, &p); 
	  domain2 = p;
	  d2n = domain2->n;
	}
      else
	{
	  domain2 = NULL;
	  d2n     = 0;
	}

      /* Select a random total sequence length */
      if (d1n+d2n > cfg->db_maxL) esl_fatal("can't construct test seq; no db seq >= %d residues\n", d1n+d2n);
      do {                                                     
	if (esl_ssi_FindNumber(cfg->dbfp->data.ascii.ssi, esl_rnd_Roll(cfg->r, cfg->db_nseq), NULL, NULL, NULL, &L, NULL) != eslOK)
	  esl_fatal("failed to look up a random seq");
      } while (L < d1n+d2n);

      /* Now figure out the embedding */
      if (ndomains == 2) 
	{
	  /* Select random lengths of three flanking domains;
	   * Imagine picking two "insert after" points i,j in sequence 1..L', for
	   * L' = L-d1n-d2n (the total length of nonhomologous test seq)
	   */
	  do {
	    i = esl_rnd_Roll(cfg->r, L - d1n - d2n + 1 ); /* i = 0..L' */
	    j = esl_rnd_Roll(cfg->r, L - d1n - d2n + 1 ); /* j = 0..L' */
	  } while (i > j);

	  /* now 1           .. i         = random region 1 (if i==0, there's none); 
	   *     i+1         .. i+d1n     = domain 1
	   *     i+d1n+1     .. j+d1n     = random region 2 (if i==j, there's none);
	   *     j+d1n+1     .. j+d1n+d2n = domain 2
	   *     j+d1n+d2n+1 .. L         = random region 3 (if j == L-d1n-d2n, there's none);
	   */
	  L1 = i;			
	  L2 = j-i;
	  L3 = L - d1n - d2n - j;
	}
      else 
	{ /* embedding one domain */
	  i = esl_rnd_Roll(cfg->r, L - d1n + 1 ); /* i = 0..L' */
	  /* now 1           .. i         = random region 1 (if i==0, there's none); 
	   *     i+1         .. i+d1n     = domain 1
	   *     i+d1n+1     .. L         = random region 2 (if i==j, there's none);
	   */
	  L1 = i;			
	  L2 = L - d1n - L1;
	  L3 = 0;
	}
      
      sq = esl_sq_CreateDigital(cfg->abc);
      esl_sq_GrowTo(sq, L);
      sq->n = L;
      if (ndomains == 2) 
	{
	  esl_sq_FormatName(sq, "%s/%d/%d-%d/%d-%d", testname, cfg->ntest, i+1, i+d1n, j+d1n+1, j+d1n+d2n);
	  esl_sq_FormatDesc(sq, "domains: %s %s", domain1->name, domain2->name);
	}
      else
	{
	  esl_sq_FormatName(sq, "%s/%d/%d-%d",   testname, cfg->ntest, i+1, i+d1n);
	  esl_sq_FormatDesc(sq, "domain: %s", domain1->name);
	}

      fprintf(cfg->possummfp, "%-35s %5d %5d %5d %5d %5d %5d", sq->name, (int) sq->n, L1, d1n, L2, d2n, L3);


      sq->dsq[0] = sq->dsq[L+1] = eslDSQ_SENTINEL;
      set_random_segment(go, cfg, cfg->possummfp, sq->dsq+1,           L1);
      memcpy(sq->dsq+i+1,     domain1->dsq+1, sizeof(ESL_DSQ) * d1n);
      fprintf(cfg->possummfp, " %-24s %5d %5d", domain1->name, 1, d1n);
      set_random_segment(go, cfg, cfg->possummfp, sq->dsq+i+d1n+1,     L2);
      if (ndomains == 2) 
	{
	  memcpy(sq->dsq+j+d1n+1, domain2->dsq+1, sizeof(ESL_DSQ) * d2n);
	  fprintf(cfg->possummfp, " %-24s %5d %5d", domain2->name, 1, d2n);
	  set_random_segment(go, cfg, cfg->possummfp, sq->dsq+j+d1n+d2n+1, L3);
	}
      fprintf(cfg->possummfp, "\n");

      cfg->test_lens[cfg->ntest].L   = L;
      cfg->test_lens[cfg->ntest].L1  = L1;
      cfg->test_lens[cfg->ntest].d1n = d1n;
      cfg->test_lens[cfg->ntest].L2  = L2;
      cfg->test_lens[cfg->ntest].d2n = d2n;
      cfg->test_lens[cfg->ntest].L3  = L3;
      cfg->ntest++;
      ntest++;

      esl_sqio_Write(cfg->out_seqfp, sq, eslSQFILE_FASTA, FALSE);

      esl_sq_Destroy(domain1);
      if (ndomains == 2) esl_sq_Destroy(domain2);
      esl_sq_Destroy(sq);
    }

  *ret_ntest = ntest;
  return eslOK;

 ERROR:
  esl_fatal("Failure in synthesize_positives");
  return status;
}
/* glocal_rescore_isolated_domain()
 * EPN, Tue Oct  5 10:16:12 2010
 *
 * Based on p7_domaindef.c's rescore_isolated_domain(). Modified 
 * so that generic matrices (which can be used for glocally configured
 * models) can be used. This function finds a single glocal domain, not a 
 * single local one. 
 * 
 * Also modified to optionally remove the Backward and OA alignment.
 * The decision to do these is determined by three input parameters:
 * <null2_is_done>: TRUE if we've already computed the null2 scores for 
 *                  this region (see Sean's notes below).
 * <do_null2>:      TRUE if we will apply a null2 penalty eventually 
 *                  to this domain 
 * <do_aln>:        TRUE if we need the OA alignment
 *
 * Notes (verbatim) from p7_domaindef.c::rescore_isolated_domain():
 *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 * SRE, Fri Feb  8 09:18:33 2008 [Janelia]
 *
 * We have isolated a single domain's envelope from <i>..<j> in
 * sequence <sq>, and now we want to score it in isolation and obtain
 * an alignment display for it.
 * 
 * (Later, we can add up all the individual domain scores from this
 * seq into a new per-seq score, to compare to the original per-seq
 * score).
 *  
 * The caller provides model <om> configured in unilocal mode; by
 * using unilocal (as opposed to multilocal), we're going to force the
 * identification of a single domain in this envelope now.
 * 
 * The alignment is an optimal accuracy alignment (sensu IH Holmes),
 * also obtained in unilocal mode.
 * 
 * The caller provides DP matrices <ox1> and <ox2> with sufficient
 * space to hold Forward and Backward calculations for this domain
 * against the model. (The caller will typically already have matrices
 * sufficient for the complete sequence lying around, and can just use
 * those.) The caller also provides a <P7_DOMAINDEF> object which is
 * (efficiently, we trust) managing any necessary temporary working
 * space and heuristic thresholds.
 * 
 * Returns <eslOK> if a domain was successfully identified, scored,
 * and aligned in the envelope; if so, the per-domain information is
 * registered in <ddef>, in <ddef->dcl>.
 * 
 * And here's what's happened to our working memory:
 * 
 * <ddef>: <ddef->tr> has been used, and possibly reallocated, for
 *         the OA trace of the domain. Before exit, we called
 *         <Reuse()> on it.
 * 
 * <ox1> : happens to be holding OA score matrix for the domain
 *         upon return, but that's not part of the spec; officially
 *         its contents are "undefined".
 *
 * <ox2> : happens to be holding a posterior probability matrix
 *         for the domain upon return, but we're not making that
 *         part of the spec, so caller shouldn't rely on this;
 *         spec just makes its contents "undefined".
 */
static int
glocal_rescore_isolated_domain(P7_DOMAINDEF *ddef, const P7_PROFILE *gm, const ESL_SQ *sq, 
			       P7_GMX *gx1, P7_GMX *gx2, int i, int j, int null2_is_done, 
			       int do_null2, int do_aln)
{
  P7_DOMAIN     *dom           = NULL;
  int            Ld            = j-i+1;
  float          domcorrection = 0.0;
  float          envsc, oasc;
  int            z;
  int            pos;
  float          null2[p7_MAXCODE];
  int            status;

  p7_GForward (sq->dsq + i-1, Ld, gm, gx1, &envsc);

  oasc = 0.;
  if(do_null2 || do_aln) { 
    p7_GBackward(sq->dsq + i-1, Ld, gm, gx2, NULL);
  
    status = p7_GDecoding(gm, gx1, gx2, gx2);      /* <ox2> is now overwritten with post probabilities     */
    if (status == eslERANGE) return eslFAIL;      /* rare: numeric overflow; domain is assumed to be repetitive garbage [J3/119-212] */
  
    /* Is null2 set already for this i..j? (It is, if we're in a domain that
     * was defined by stochastic traceback clustering in a multidomain region;
     * it isn't yet, if we're in a simple one-domain region). If it isn't,
     * do it now, by the expectation (posterior decoding) method.
     */
    if ((! null2_is_done) && do_null2) { 
      p7_GNull2_ByExpectation(gm, gx2, null2);
      for (pos = i; pos <= j; pos++) 
	ddef->n2sc[pos]  = logf(null2[sq->dsq[pos]]);
    }
    if(do_null2) { 
      for (pos = i; pos <= j; pos++) 
	domcorrection   += ddef->n2sc[pos];	        /* domcorrection is in units of NATS */
    }

    if(do_aln) { 
      /* Find an optimal accuracy alignment */
      p7_GOptimalAccuracy(gm, gx2, gx1, &oasc);      /* <ox1> is now overwritten with OA scores              */
      p7_GOATrace        (gm, gx2, gx1, ddef->tr);   /* <tr>'s seq coords are offset by i-1, rel to orig dsq */
      
      /* hack the trace's sq coords to be correct w.r.t. original dsq */
      for (z = 0; z < ddef->tr->N; z++)
	if (ddef->tr->i[z] > 0) ddef->tr->i[z] += i-1;
    }
    /* get ptr to next empty domain structure in domaindef's results */
  }
  if (ddef->ndom == ddef->nalloc) {
    void *p;
    ESL_RALLOC(ddef->dcl, p, sizeof(P7_DOMAIN) * (ddef->nalloc*2));
    ddef->nalloc *= 2;    
  }
  dom = &(ddef->dcl[ddef->ndom]);
  
  /* store the results in it */
  dom->ienv          = i;
  dom->jenv          = j;
  dom->envsc         = envsc;         /* in units of NATS */
  dom->domcorrection = domcorrection; /* in units of NATS, will be 0. if do_null2 == FALSE */
  dom->oasc          = oasc;	      /* in units of expected # of correctly aligned residues, will be 0. if do_aln == FALSE */
  dom->dombias       = 0.0;	/* gets set later, using bg->omega and dombias */
  dom->bitscore      = 0.0;	/* gets set later by caller, using envsc, null score, and dombias */
  dom->lnP           = 1.0;	/* gets set later by caller, using bitscore */
  dom->is_reported   = FALSE;	/* gets set later by caller */
  dom->is_included   = FALSE;	/* gets set later by caller */
  dom->ad            = NULL;
  dom->iali          = i;
  dom->jali          = j;

  ddef->ndom++;

  if(do_aln) { 
    p7_trace_Reuse(ddef->tr);
  }
  return eslOK;

 ERROR:
  if(do_aln) { 
    p7_trace_Reuse(ddef->tr);
  }
  return status;
}
int
main(int argc, char **argv)
{
  ESL_GETOPTS  *go      = NULL;	/* application configuration       */
  char         *hmmfile = NULL;	/* HMM file name                   */
  char         *seqfile = NULL; /* sequence file name              */
  char         *mapfile = NULL; /* optional mapped MSA file name   */
  int           infmt   = eslSQFILE_UNKNOWN;
  int           outfmt  = eslMSAFILE_STOCKHOLM;
  P7_HMMFILE   *hfp     = NULL;	/* open HMM file                   */
  ESL_SQFILE   *sqfp    = NULL;	/* open sequence file              */
  char         *outfile = NULL;	  /* output filename               */
  FILE         *ofp     = stdout; /* output stream                 */
  ESL_SQ      **sq      = NULL;	/* array of sequences              */
  void         *p       = NULL;	/* tmp ptr for reallocation        */
  int           nseq    = 0;	/* # of sequences in <seqfile>     */
  int           mapseq  = 0;	/* # of sequences in mapped MSA    */
  int           totseq  = 0;	/* # of seqs in all sources        */
  ESL_ALPHABET *abc     = NULL;	/* alphabet (set from the HMM file)*/
  P7_HMM       *hmm     = NULL;
  P7_TRACE    **tr      = NULL;	/* array of tracebacks             */
  ESL_MSA      *msa     = NULL;	/* resulting multiple alignment    */
  int           msaopts = 0;	/* flags to p7_tracealign_Seqs()   */
  int           idx;		/* counter over seqs, traces       */
  int           status;		/* easel/hmmer return code         */
  char          errbuf[eslERRBUFSIZE];

  /* Parse the command line
   */
  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) cmdline_failure(argv[0], "Error in configuration: %s\n",       go->errbuf);
  if (esl_opt_GetBoolean(go, "-h") )                   cmdline_help   (argv[0], go);
  if (esl_opt_ArgNumber(go) != 2)                      cmdline_failure(argv[0], "Incorrect number of command line arguments.\n");        

  hmmfile = esl_opt_GetArg(go, 1);
  seqfile = esl_opt_GetArg(go, 2);

  if (strcmp(hmmfile, "-") == 0 && strcmp(seqfile, "-") == 0) 
    cmdline_failure(argv[0], "Either <hmmfile> or <seqfile> may be '-' (to read from stdin), but not both.\n");

  msaopts |= p7_ALL_CONSENSUS_COLS; /* default as of 3.1 */
  if (esl_opt_GetBoolean(go, "--trim"))    msaopts |= p7_TRIM;

  /* If caller declared an input format, decode it 
   */
  if (esl_opt_IsOn(go, "--informat")) {
    infmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--informat"));
    if (infmt == eslSQFILE_UNKNOWN) cmdline_failure(argv[0], "%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--informat"));
  }

  /* Determine output alignment file format */
  outfmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--outformat"));
  if (outfmt == eslMSAFILE_UNKNOWN)    cmdline_failure(argv[0], "%s is not a recognized output MSA file format\n", esl_opt_GetString(go, "--outformat"));

  /* Open output stream */
  if ( (outfile = esl_opt_GetString(go, "-o")) != NULL) 
  {
    if ((ofp = fopen(outfile, "w")) == NULL)
      cmdline_failure(argv[0], "failed to open -o output file %s for writing\n", outfile);
  }


  /* If caller forced an alphabet on us, create the one the caller wants  
   */
  if      (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO);
  else if (esl_opt_GetBoolean(go, "--dna"))   abc = esl_alphabet_Create(eslDNA);
  else if (esl_opt_GetBoolean(go, "--rna"))   abc = esl_alphabet_Create(eslRNA);

  /* Read one HMM, and make sure there's only one.
   */
  status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf);
  if      (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf);
  else if (status == eslEFORMAT)   p7_Fail("File format problem in trying to open HMM file %s.\n%s\n",                hmmfile, errbuf);
  else if (status != eslOK)        p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n",                       status, hmmfile, errbuf);  

  status = p7_hmmfile_Read(hfp, &abc, &hmm);
  if      (status == eslEFORMAT)   p7_Fail("Bad file format in HMM file %s:\n%s\n",          hfp->fname, hfp->errbuf);
  else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", hfp->fname, esl_abc_DecodeType(abc->type));
  else if (status == eslEOF)       p7_Fail("Empty HMM file %s? No HMM data found.\n",        hfp->fname);
  else if (status != eslOK)        p7_Fail("Unexpected error in reading HMMs from %s\n",     hfp->fname);

  status = p7_hmmfile_Read(hfp, &abc, NULL);
  if      (status != eslEOF)       p7_Fail("HMM file %s does not contain just one HMM\n",    hfp->fname);
  p7_hmmfile_Close(hfp);


  /* We're going to build up two arrays: sequences and traces.
   * If --mapali option is chosen, the first set of sequences/traces is from the provided alignment
   */
  if ( (mapfile = esl_opt_GetString(go, "--mapali")) != NULL)
  {
    map_alignment(mapfile, hmm, &sq, &tr, &mapseq);
  }
  totseq = mapseq;

  /* Read digital sequences into an array (possibly concat'ed onto mapped seqs)
   */
  status = esl_sqfile_OpenDigital(abc, seqfile, infmt, NULL, &sqfp);
  if      (status == eslENOTFOUND) p7_Fail("Failed to open sequence file %s for reading\n",          seqfile);
  else if (status == eslEFORMAT)   p7_Fail("Sequence file %s is empty or misformatted\n",            seqfile);
  else if (status != eslOK)        p7_Fail("Unexpected error %d opening sequence file %s\n", status, seqfile);

  ESL_RALLOC(sq, p, sizeof(ESL_SQ *) * (totseq + 1));
  sq[totseq] = esl_sq_CreateDigital(abc);
  nseq = 0;
  while ((status = esl_sqio_Read(sqfp, sq[totseq+nseq])) == eslOK)
  {
    nseq++;
    ESL_RALLOC(sq, p, sizeof(ESL_SQ *) * (totseq+nseq+1));
    sq[totseq+nseq] = esl_sq_CreateDigital(abc);
  }
  if      (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", 
					   sqfp->filename, esl_sqfile_GetErrorBuf(sqfp));
  else if (status != eslEOF)     esl_fatal("Unexpected error %d reading sequence file %s", status, sqfp->filename);
  esl_sqfile_Close(sqfp);
  totseq += nseq;


  /* Remaining initializations, including trace array allocation
   */
  ESL_RALLOC(tr, p, sizeof(P7_TRACE *) * totseq);
  for (idx = mapseq; idx < totseq; idx++)
    tr[idx] = p7_trace_CreateWithPP();

  p7_tracealign_computeTraces(hmm, sq, mapseq, totseq - mapseq, tr);

  p7_tracealign_Seqs(sq, tr, totseq, hmm->M, msaopts, hmm, &msa);

  eslx_msafile_Write(ofp, msa, outfmt);

  for (idx = 0; idx <= totseq; idx++) esl_sq_Destroy(sq[idx]);    /* including sq[nseq] because we overallocated */
  for (idx = 0; idx <  totseq; idx++) p7_trace_Destroy(tr[idx]); 
  free(sq);
  free(tr);
  esl_msa_Destroy(msa);
  p7_hmm_Destroy(hmm);
  if (ofp != stdout) fclose(ofp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return eslOK;

 ERROR:
  return status;
}