Example #1
0
/* Function:  esl_workqueue_Reset()
 * Synopsis:  Reset the queue for another run.
 * Incept:    MSF, Thu Jun 18 11:51:39 2009
 *
 * Purpose:   Reset the queue for another run.  This is done by moving
 *            all the queued object to the reader's list (i.e. producer).
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslESYS> if thread synchronization fails somewhere.
 *            <eslEINVAL> if something's wrong with <queue>.
 */
int 
esl_workqueue_Reset(ESL_WORK_QUEUE *queue)
{
  int inx;
  int queueSize;

  if (queue == NULL)                                ESL_EXCEPTION(eslEINVAL, "Invalid queue object");
  if (pthread_mutex_lock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS,   "mutex lock failed");

  queueSize = queue->queueSize;

  /* move all buffers back to the reader queue */
  while (queue->workerQueueCnt > 0) 
    {
      inx = (queue->readerQueueHead + queue->readerQueueCnt) % queueSize;
      queue->readerQueue[inx] = queue->workerQueue[queue->workerQueueHead];
      ++queue->readerQueueCnt;

      queue->workerQueue[queue->workerQueueHead] = NULL;
      queue->workerQueueHead = (queue->workerQueueHead + 1) % queueSize;
      --queue->workerQueueCnt;
    }

  queue->pendingWorkers = 0;

  if (pthread_mutex_unlock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed");

  return eslOK;
}
Example #2
0
/* Function:  esl_rmx_ValidateQ()
 * Incept:    SRE, Sun Mar 11 10:30:50 2007 [Janelia]
 *
 * Purpose:   Validates an instantaneous rate matrix <Q> for a
 *            continuous-time Markov process, whose elements $q_{ij}$
 *            represent instantaneous transition rates $i \rightarrow
 *            j$. 
 *            
 *            Rows satisfy the condition that
 *            $q_{ii} = -\sum_{i \neq j} q_{ij}$, and also
 *            that $q_{ij} \geq 0$ for all $j \neq i$. 
 *            
 *            <tol> specifies the floating-point tolerance to which
 *            that condition must hold: <fabs(sum-q_ii) <= tol>.
 *            
 *            <errbuf> is an optional error message buffer. The caller
 *            may pass <NULL> or a pointer to a buffer of at least
 *            <eslERRBUFSIZE> characters.
 *            
 * Args:      Q      - rate matrix to validate
 *            tol    - floating-point tolerance (0.00001, for example)      
 *            errbuf - OPTIONAL: ptr to an error buffer of at least
 *                     <eslERRBUFSIZE> characters.
 *
 * Returns:   <eslOK> on successful validation. 
 *            <eslFAIL> on failure, and if a non-<NULL> <errbuf> was
 *            provided by the caller, a message describing
 *            the reason for the failure is put there.
 *
 * Throws:    (no abnormal error conditions)
 */
int
esl_rmx_ValidateQ(ESL_DMATRIX *Q, double tol, char *errbuf)
{
  int    i,j;
  double qi;

  if (Q->type != eslGENERAL) ESL_EXCEPTION(eslEINVAL, "Q must be type eslGENERAL to be validated");
  if (Q->n    != Q->m)       ESL_EXCEPTION(eslEINVAL, "a rate matrix Q must be square");

  for (i = 0; i < Q->n; i++)
    {
      qi = 0.;
      for (j = 0; j < Q->m; j++)
	{
	  if (i != j) {
	    if (Q->mx[i][j] < 0.)       ESL_FAIL(eslFAIL, errbuf, "offdiag elem %d,%d < 0",i,j);
	    qi += Q->mx[i][j];
	  } else {
	    if (Q->mx[i][j] > 0.)       ESL_FAIL(eslFAIL, errbuf, "diag elem %d,%d < 0", i,j);
	  }
	}
      if (fabs(qi + Q->mx[i][i]) > tol) ESL_FAIL(eslFAIL, errbuf, "row %d does not sum to 0.0", i);
    }
  return eslOK;
}
Example #3
0
/* Function:  esl_dmatrix_Copy()
 *
 * Purpose:   Copies <src> matrix into <dest> matrix. <dest> must
 *            be allocated already by the caller.
 * 
 *            You may copy to a matrix of a different type, so long as
 *            the copy makes sense. If <dest> matrix is a packed type
 *            and <src> is not, the values that should be zeros must
 *            be zero in <src>, else the routine throws
 *            <eslEINCOMPAT>. If the <src> matrix is a packed type and
 *            <dest> is not, the values that are implicitly zeros are
 *            set to zeros in the <dest> matrix.
 *            
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEINCOMPAT> if <src>, <dest> are different sizes,
 *            or if their types differ and <dest> cannot represent
 *            <src>.
 */
int
esl_dmatrix_Copy(const ESL_DMATRIX *src, ESL_DMATRIX *dest)
{
  int i,j;

  if (dest->n != src->n || dest->m != src->m)
    ESL_EXCEPTION(eslEINCOMPAT, "matrices of different size");

  if (src->type == dest->type)   /* simple case. */
    memcpy(dest->mx[0], src->mx[0], src->ncells * sizeof(double));

  else if (src->type == eslGENERAL && dest->type == eslUPPER)		
    {
      for (i = 1; i < src->n; i++)
	for (j = 0; j < i; j++)
	  if (src->mx[i][j] != 0.) 
	    ESL_EXCEPTION(eslEINCOMPAT, "general matrix isn't upper triangular, can't be copied/packed");
      for (i = 0; i < src->n; i++)
	for (j = i; j < src->m; j++)
	  dest->mx[i][j] = src->mx[i][j];
    }
  
  else if (src->type == eslUPPER && dest->type == eslGENERAL)		
    {
      for (i = 1; i < src->n; i++)
	for (j = 0; j < i; j++)
	  dest->mx[i][j] = 0.;
      for (i = 0; i < src->n; i++)
	for (j = i; j < src->m; j++)
	  dest->mx[i][j] = src->mx[i][j];      
    }

  return eslOK;
}
Example #4
0
/* Using FChoose() here would mean allocating tmp space for 2M-1 paths;
 * instead we use the fact that E(i) is itself the necessary normalization
 * factor, and implement FChoose's algorithm here for an on-the-fly 
 * calculation.
 */
static inline int
select_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k)
{
  int    Q     = p7O_NQF(ox->M);
  float  sum   = 0.0;
  float  roll  = esl_random(rng);
  float  norm  = 1.0 / ox->xmx[i*p7X_NXCELLS+p7X_E]; /* all M, D already scaled exactly the same */
  __m128 xEv   = _mm_set1_ps(norm);
  union { __m128 v; float p[4]; } u;
  int    q,r;

  while (1) {
    for (q = 0; q < Q; q++)
      {
	u.v = _mm_mul_ps(ox->dpf[i][q*3 + p7X_M], xEv);
	for (r = 0; r < 4; r++) {
	  sum += u.p[r];
	  if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_M;}
	}

	u.v = _mm_mul_ps(ox->dpf[i][q*3 + p7X_D], xEv);
	for (r = 0; r < 4; r++) {
	  sum += u.p[r];
	  if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_D;}
	}
      }
    if (sum < 0.99) 
      ESL_EXCEPTION(-1, "Probabilities weren't normalized - failed to trace back from an E");
  }
  /*UNREACHED*/
  ESL_EXCEPTION(-1, "unreached code was reached. universe collapses.");
} 
Example #5
0
/* Function:  esl_recorder_ResizeTo()
 * Synopsis:  Reallocate an <ESL_RECORDER> for a new <maxlines>
 * Incept:    SRE, Fri Dec 25 17:02:46 2009 [Casa de Gatos]
 *
 * Purpose:   Reallocate the <ESL_RECORDER> <rc> to have a new
 *            window size <maxlines>. 
 *            
 *            The new <maxlines> may be more or less than the previous
 *            window size for <rc>.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> if (re-)allocation fails.
 *
 *            <eslEINVAL> if the recorder has a marked line (for start
 *            of a block) and you try to shrink it so much that that
 *            marked line would be lost.
 *
 *            <eslEINCONCEIVABLE> on any baseline resetting problem;
 *            this would have to be an internal error in the module.
 * 
 * Note:      We may have to repermute the line array, and reset its
 *            baseline, as follows.
 *            
 *            In the growth case: if the line array is out of order
 *            (circularly permuted) we must straighten it out, which
 *            means resetting the baseline.  
 *            i.e. to grow 3 1 2 to nalloc=6, we need 1 2 3 x x x; 
 *            simple reallocation to 3 1 2 x x x doesn't work,
 *            next read would make 3 4 2 x x x.
 *          
 *            In the shrinkage case: if the line array is in use beyond the
 *            new array size, we set a new baseline to keep as much of the
 *            old array as possible.
 * 
 *            i.e. for 6->3  
 *            1 2 3 x x x -> 1 2 3
 *            1 2 3 4 x x -> 2 3 4 with new baseline=2.
 *            4 5 0 1 2 3 -> 3 4 5 with new baseline=3
 */
int
esl_recorder_ResizeTo(ESL_RECORDER *rc, int new_maxlines)
{
  int   idx;
  int   newbase;
  void *tmp;
  int   minlines;
  int   status;

  if (new_maxlines == rc->nalloc) return eslOK;

  if (new_maxlines > rc->nalloc) /* growth case */
    {
      if ((rc->nread - rc->baseline) / rc->nalloc != 0)	/* array is permuted; reorder it */
	{
	  newbase = ESL_MAX(rc->baseline, rc->nread - rc->nalloc);
	  status = recorder_new_baseline(rc, newbase);
	  if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly");
	}
    }
  else 				/* shrinkage case */
    {
      /* check that the marked line (if any) will stay in window */
      if (rc->markline >= 0)	
	{
	  minlines = rc->nread - rc->markline;
	  if (new_maxlines < minlines)
	    ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing marked line");
	}
      /* check that current line will stay in window */
      minlines = rc->nread - rc->ncurr + 1;
      if (new_maxlines < minlines)
	ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing current line");

      if (rc->nread - rc->baseline > new_maxlines) /* baseline needs to move up */
	{
	  newbase = rc->nread - new_maxlines;
	  status = recorder_new_baseline(rc, newbase);
	  if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly");
	}

      for (idx = new_maxlines; idx < rc->nalloc; idx++)
	if (rc->line[idx]) free(rc->line[idx]);
    }

  ESL_RALLOC(rc->line,   tmp, sizeof(char *) * new_maxlines);
  ESL_RALLOC(rc->lalloc, tmp, sizeof(int)    * new_maxlines);
  ESL_RALLOC(rc->offset, tmp, sizeof(off_t)  * new_maxlines);
  for (idx = rc->nalloc; idx < new_maxlines; idx++) /* no-op in shrinkage case */
    { 
      rc->line[idx]   = NULL;
      rc->lalloc[idx] = 0;
      rc->offset[idx] = 0;
    }
  rc->nalloc = new_maxlines;
  return eslOK;

 ERROR:
  return status;
}
Example #6
0
/* Function:  p7_hmm_mpi_Send()
 * Synopsis:  Send an HMM as an MPI work unit.
 *
 * Purpose:   Sends an HMM <hmm> as a work unit to MPI process
 *            <dest> (where <dest> ranges from 0..<nproc-1>), tagged
 *            with MPI tag <tag>, for MPI communicator <comm>, as 
 *            the sole workunit or result. 
 *            
 *            Work units are prefixed by a status code indicating the
 *            number of HMMs sent. If <hmm> is <NULL>, this code is 0,
 *            and <_Recv()> interprets such a unit as an EOD
 *            (end-of-data) signal, a signal to cleanly shut down
 *            worker processes.
 *            
 *            In order to minimize alloc/free cycles in this routine,
 *            caller passes a pointer to a working buffer <*buf> of
 *            size <*nalloc> characters. If necessary (i.e. if <hmm> is
 *            too big to fit), <*buf> will be reallocated and <*nalloc>
 *            increased to the new size. As a special case, if <*buf>
 *            is <NULL> and <*nalloc> is 0, the buffer will be
 *            allocated appropriately, but the caller is still
 *            responsible for free'ing it.
 *            
 * Returns:   <eslOK> on success; <*buf> may have been reallocated and
 *            <*nalloc> may have been increased.
 * 
 * Throws:    <eslESYS> if an MPI call fails; <eslEMEM> if a malloc/realloc
 *            fails. In either case, <*buf> and <*nalloc> remain valid and useful
 *            memory (though the contents of <*buf> are undefined). 
 * 
 * Note:      Compare to p7_hmmfile_WriteBinary(). The two operations (sending
 *            an HMM via MPI, or saving it as a binary file to disk) are
 *            similar.
 */
int
p7_hmm_mpi_Send(const P7_HMM *hmm, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc)
{
  int   n = 0;
  int   code;
  int   sz, pos;
  int   status;

  /* Figure out size. We always send at least a status code (0=EOD=nothing sent) */
  if ( MPI_Pack_size(1, MPI_INT, comm, &sz)          != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi pack size failed");  n += sz;
  if ((status = p7_hmm_mpi_PackSize(hmm, comm, &sz)) != eslOK)       return status;                                   n += sz;

  /* Make sure the buffer is allocated appropriately */
  if (*buf == NULL || n > *nalloc) 
    {
      ESL_REALLOC(*buf, sizeof(char) * n);
      *nalloc = n; 
    }

  /* Pack the status code and HMM into the buffer */
  /* The status code is the # of HMMs being sent as one MPI message; here 1 or 0 */
  pos  = 0;
  code = (hmm ? 1 : 0);
  if (MPI_Pack(&code, 1, MPI_INT,           *buf, n, &pos, comm)  != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi pack failed");
  if (hmm && (status = p7_hmm_mpi_Pack(hmm, *buf, n, &pos, comm)) != eslOK)       return status;
  
  /* Send the packed HMM to the destination. */
  if (MPI_Send(*buf, n, MPI_PACKED, dest, tag, comm) != MPI_SUCCESS)  ESL_EXCEPTION(eslESYS, "mpi send failed");
  return eslOK;

 ERROR:
  return status;
}
Example #7
0
/* Function:  p7_hmm_mpi_Recv()
 * Synopsis:  Receives an HMM as a work unit from an MPI sender.
 *
 * Purpose:   Receive a work unit that consists of a single HMM
 *            sent by MPI <source> (<0..nproc-1>, or
 *            <MPI_ANY_SOURCE>) tagged as <tag> for MPI communicator <comm>.
 *            
 *            Work units are prefixed by a status code that gives the
 *            number of HMMs to follow; here, 0 or 1 (but in the future,
 *            we could easily extend to sending several HMMs in one 
 *            packed buffer). If we receive a 1 code and we successfully
 *            unpack an HMM, this routine will return <eslOK> and a non-<NULL> <*ret_hmm>.
 *            If we receive a 0 code (a shutdown signal), 
 *            this routine returns <eslEOD> and <*ret_hmm> is <NULL>.
 *   
 *            Caller provides a working buffer <*buf> of size
 *            <*nalloc> characters. These are passed by reference, so
 *            that <*buf> can be reallocated and <*nalloc> increased
 *            if necessary. As a special case, if <*buf> is <NULL> and
 *            <*nalloc> is 0, the buffer will be allocated
 *            appropriately, but the caller is still responsible for
 *            free'ing it.
 *            
 *            Caller may or may not already know what alphabet the HMM
 *            is expected to be in.  A reference to the current
 *            alphabet is passed in <byp_abc>. If the alphabet is unknown,
 *            pass <*byp_abc = NULL>, and when the HMM is received, an
 *            appropriate new alphabet object is allocated and passed
 *            back to the caller via <*abc>.  If the alphabet is
 *            already known, <*byp_abc> is that alphabet, and the new
 *            HMM's alphabet type is verified to agree with it. This
 *            mechanism allows an application to let the first HMM
 *            determine the alphabet type for the application, while
 *            still keeping the alphabet under the application's scope
 *            of control.
 *
 * Args:      source  - index of MPI sender, 0..nproc-1 (0=master), or MPI_ANY_SOURCE
 *            tag     - MPI message tag;  MPI_ANY_TAG, or a specific message tag (0..32767 will work on any MPI)
 *            comm    - MPI communicator; MPI_COMM_WORLD, or a specific MPI communicator
 *            buf     - working buffer (for receiving packed message);
 *                      if <*buf> == NULL, a <*buf> is allocated and returned;
 *                      if <*buf> != NULL, it is used (and may be reallocated)
 *            nalloc  - allocation size of <*buf> in bytes; pass 0 if <*buf==NULL>.           
 *            byp_abc - BYPASS: <*byp_abc> == ESL_ALPHABET *> if known;
 *                              <*byp_abc> == NULL> if alphabet unknown.
 *            ret_hmm  - RETURN: newly allocated/received profile
 *
 * Returns:   <eslOK> on success. <*ret_hmm> contains the received HMM;
 *            it is allocated here, and the caller is responsible for
 *            free'ing it.  <*buf> may have been reallocated to a
 *            larger size, and <*nalloc> may have been increased.  If
 *            <*abc> was passed as <NULL>, it now points to an
 *            <ESL_ALPHABET> object that was allocated here; caller is
 *            responsible for free'ing this.
 *            
 *            Returns <eslEOD> if an end-of-data signal was received.
 *            In this case, <*buf>, <*nalloc>, and <*abc> are left unchanged,
 *            and <*ret_hmm> is <NULL>.
 *            
 *            Returns <eslEINCOMPAT> if the HMM is in a different alphabet
 *            than <*abc> said to expect. In this case, <*abc> is unchanged,
 *            <*buf> and <*nalloc> may have been changed, and <*ret_hmm> is
 *            <NULL>.
 *            
 * Throws:    <eslEMEM> on allocation error, and <eslESYS> on MPI communication
 *            errors; in either case <*ret_hmm> is <NULL>.           
 */
int
p7_hmm_mpi_Recv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_ALPHABET **byp_abc, P7_HMM **ret_hmm)
{
  int         pos = 0;
  int         code;
  int         n;
  MPI_Status  mpistatus;
  int         status;

  /* Probe first, because we need to know if our buffer is big enough. */
  if ( MPI_Probe(source, tag, comm, &mpistatus)  != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi probe failed");
  if ( MPI_Get_count(&mpistatus, MPI_PACKED, &n) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi get count failed");

  /* Make sure the buffer is allocated appropriately */
  if (*buf == NULL || n > *nalloc) 
    {
      ESL_REALLOC(*buf, sizeof(char) * n);
      *nalloc = n; 
    }

  /* Receive the entire packed work unit */
  if (MPI_Recv(*buf, n, MPI_PACKED, source, tag, comm, &mpistatus) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi recv failed");

  /* Unpack the status code prefix */
  if (MPI_Unpack(*buf, n, &pos, &code, 1, MPI_INT, comm) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi unpack failed");

  if      (code == 0) { status = eslEOD; *ret_hmm = NULL; }
  else if (code == 1)   status = p7_hmm_mpi_Unpack(*buf, *nalloc, &pos, comm, byp_abc, ret_hmm);
  else                  ESL_EXCEPTION(eslESYS, "bad mpi buffer transmission code");
  return status;

 ERROR: /* from ESL_REALLOC only */
  *ret_hmm = NULL;
  return status;
}
Example #8
0
/* tau_by_moments()
 * 
 * Obtain an initial estimate for tau by 
 * matching moments. Also returns mean and
 * logsum, which we need for ML fitting.
 * To obtain a lambda estimate, use
 * lambda = tau / mean.
 */
static int
tau_by_moments(double *x, int n, double mu, double *ret_tau, double *ret_mean, double *ret_logsum)
{
  int    i;
  double mean, var, logsum;

  mean = var = logsum = 0.;
  for (i = 0; i < n; i++)
    {
      if (x[i] < mu) ESL_EXCEPTION(eslEINVAL, "No x[i] can be < mu in gamma data");
      mean   += x[i] - mu;	   /* mean is temporarily just the sum */
      logsum += log(x[i] - mu);
      var  += (x[i]-mu)*(x[i]-mu); /* var is temporarily the sum of squares */
    }
  var     = (var - mean*mean/(double)n) / ((double)n-1); /* now var is the variance */
  mean   /= (double) n;		/* and now mean is the mean */
  logsum /= (double) n;

  if (var == 0.)		/* and if mean = 0, var = 0 anyway. */
    ESL_EXCEPTION(eslEINVAL, "Zero variance in allegedly gamma-distributed dataset");
  
  if (ret_tau    != NULL) *ret_tau    = mean * mean / var;
  if (ret_mean   != NULL) *ret_mean   = mean;
  if (ret_logsum != NULL) *ret_logsum = logsum;
  return eslOK;
}
Example #9
0
/* Function:  esl_workqueue_Remove()
 * Synopsis:  Removes a queued object from the producers list.
 * Incept:    MSF, Thu Jun 18 11:51:39 2009
 *
 * Purpose:   Removes a queued object from the producers list.
 *
 *            A object <void> that has already been consumed by a worker
 *            is removed the the producers list.  If there are no empty
 *            objects, a <obj> is set to NULL.
 *
 *            The pointer to the object is returned in the obj arguement.
 *
 * Returns:   <eslOK>  on success.
 *            <eslEOD> if no objects are in the queue.
 *
 * Throws:    <eslESYS> if thread synchronization fails somewhere.
 *            <eslEINVAL> if something's wrong with <queue>.
 */
int 
esl_workqueue_Remove(ESL_WORK_QUEUE *queue, void **obj)
{
  int inx;
  int status = eslEOD;

  if (obj == NULL)   ESL_EXCEPTION(eslEINVAL, "Invalid object pointer");
  if (queue == NULL) ESL_EXCEPTION(eslEINVAL, "Invalid queue object");

  if (pthread_mutex_lock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex lock failed");

  /* check if there are any items on the readers list */
  *obj = NULL;
  if (queue->readerQueueCnt > 0)
    {
      inx = (queue->readerQueueHead + queue->readerQueueCnt) % queue->queueSize;
      *obj = queue->readerQueue[inx];
      queue->readerQueue[inx] = NULL;
      --queue->readerQueueCnt;
      status = eslOK;
    }

  if (pthread_mutex_unlock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed");

  return status;
}
Example #10
0
/* Function:  esl_msashuffle_Shuffle()
 * Synopsis:  Shuffle an alignment's columns.
 *
 * Purpose:   Returns a column-shuffled version of <msa> in <shuf>,
 *            using random generator <r>. Shuffling by columns
 *            preserves the \% identity of the original
 *            alignment. <msa> and <shuf> can be identical, to shuffle
 *            in place.
 *            
 *            The caller sets up the rest of the data (everything but
 *            the alignment itself) in <shuf> the way it wants,
 *            including sequence names, MSA name, and other
 *            annotation. The easy thing to do is to make <shuf>
 *            a copy of <msa>: the caller might create <shuf> by
 *            a call to <esl_msa_Clone()>.
 *            
 *            The alignments <msa> and <shuf> can both be in digital
 *            mode, or can both be in text mode; you cannot mix
 *            digital and text modes.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEINVAL> if <msa>,<shuf> aren't in the same mode (digital vs. text).
 */
int
esl_msashuffle_Shuffle(ESL_RANDOMNESS *r, ESL_MSA *msa, ESL_MSA *shuf)
{
  int i, pos, alen;

  if (! (msa->flags & eslMSA_DIGITAL))
    {
      char c;
      if (shuf->flags & eslMSA_DIGITAL) ESL_EXCEPTION(eslEINVAL, "<shuf> must be in text mode if <msa> is");
      if (msa != shuf) {
	for (i = 0; i < msa->nseq; i++)
	  strcpy(shuf->aseq[i], msa->aseq[i]);
      }

      for (i = 0; i < msa->nseq; i++)
	shuf->aseq[i][msa->alen] = '\0';

      for (alen = msa->alen; alen > 1; alen--)
	{
	  pos = esl_rnd_Roll(r, alen);
	  for (i = 0; i < msa->nseq; i++)
	    {
	      c                     = msa->aseq[i][pos];
	      shuf->aseq[i][pos]    = shuf->aseq[i][alen-1];
	      shuf->aseq[i][alen-1] = c;
	    }
	}
    }
#ifdef eslAUGMENT_ALPHABET
  else 
    {
      ESL_DSQ x;
      if (! (shuf->flags & eslMSA_DIGITAL)) ESL_EXCEPTION(eslEINVAL, "<shuf> must be in digital mode if <msa> is");

      if (msa != shuf) {
	for (i = 0; i < msa->nseq; i++)
	  memcpy(shuf->ax[i], msa->ax[i], (msa->alen + 2) * sizeof(ESL_DSQ));
      }

      for (i = 0; i < msa->nseq; i++)
	shuf->ax[i][msa->alen+1] = eslDSQ_SENTINEL;

      for (alen = msa->alen; alen > 1; alen--)
	{
	  pos = esl_rnd_Roll(r, alen) + 1;
	  for (i = 0; i < msa->nseq; i++)
	    {
	      x                 = msa->ax[i][pos];
	      shuf->ax[i][pos]  = shuf->ax[i][alen];
	      shuf->ax[i][alen] = x;
	    }
	}
    }
#endif /*eslAUGMENT_ALPHABET*/

  return eslOK;
}
Example #11
0
/* Function:  esl_hyperexp_Write()
 *
 * Purpose:   Write hyperexponential parameters from <hxp> to an open <fp>.
 *            
 *            The output format is suitable for input by <esl_hyperexp_Read()>.
 *
 * Returns:   <eslOK> on success.
 * 
 * Throws:    <eslEWRITE> on any write error.
 */
int
esl_hyperexp_Write(FILE *fp, ESL_HYPEREXP *hxp)
{
  int k;

  if (fprintf(fp, "%8d     # number of components\n", hxp->K)     < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed");
  if (fprintf(fp, "%8.2f   # mu (for all components)\n", hxp->mu) < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed");
  for (k = 0; k < hxp->K; k++)
    if (fprintf(fp, "%8.6f %12.6f  # q[%d], lambda[%d]\n",
		hxp->q[k], hxp->lambda[k], k, k)                  < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed");
  return eslOK;
}
Example #12
0
/* Function:  p7_Forward()
 * Synopsis:  The Forward algorithm, full matrix fill version.
 * Incept:    SRE, Fri Aug 15 18:59:43 2008 [Casa de Gatos]
 *
 * Purpose:   Calculates the Forward algorithm for sequence <dsq> of
 *            length <L> residues, using optimized profile <om>, and a
 *            preallocated DP matrix <ox>. Upon successful return, <ox>
 *            contains the filled Forward matrix, and <*opt_sc>
 *            optionally contains the raw Forward score in nats.
 *            
 *            This calculation requires $O(ML)$ memory and time.
 *            The caller must provide a suitably allocated full <ox>
 *            by calling <ox = p7_omx_Create(M, L, L)> or
 *            <p7_omx_GrowTo(ox, M, L, L)>.
 *
 *            The model <om> must be configured in local alignment
 *            mode. The sparse rescaling method used to keep
 *            probability values within single-precision floating
 *            point dynamic range cannot be safely applied to models in
 *            glocal or global mode.
 *
 * Args:      dsq     - digital target sequence, 1..L
 *            L       - length of dsq in residues          
 *            om      - optimized profile
 *            ox      - RETURN: Forward DP matrix
 *            opt_sc  - RETURN: Forward score (in nats)          
 *
 * Returns:   <eslOK> on success. 
 *
 * Throws:    <eslEINVAL> if <ox> allocation is too small, or if the profile
 *            isn't in local alignment mode.
 *            <eslERANGE> if the score exceeds the limited range of
 *            a probability-space odds ratio.
 *            In either case, <*opt_sc> is undefined.
 */
int
p7_Forward(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc)
{
#ifdef p7_DEBUGGING		
  if (om->M >  ox->allocQ4*4)    ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
  if (L     >= ox->validR)       ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
  if (L     >= ox->allocXR)      ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
  if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
#endif

  return forward_engine(TRUE, dsq, L, om, ox, opt_sc);
}
Example #13
0
/* Function:  p7_oprofile_ReadRest()
 * Synopsis:  Read the rest of an optimized profile.
 *
 * Purpose:   Read the rest of an optimized profile <om> from
 *            the <.h3p> file associated with an open HMM
 *            file <hfp>. 
 *            
 *            This is the second part of a two-part calling sequence.
 *            The <om> here must be the result of a previous
 *            successful <p7_oprofile_ReadMSV()> call on the same
 *            open <hfp>.
 *
 * Args:      hfp - open HMM file, from which we've previously
 *                  called <p7_oprofile_ReadMSV()>.
 *            om  - optimized profile that was successfully
 *                  returned by  <p7_oprofile_ReadMSV()>.
 *
 * Returns:   <eslOK> on success, and <om> is now a complete
 *            optimized profile.
 *            
 *            Returns <eslEFORMAT> if <hfp> has no <.h3p> file open,
 *            or on any parsing error, and set <hfp->errbuf> to
 *            an informative error message.
 *
 * Throws:    <eslESYS> if an <fseek()> fails to reposition the
 *            binary <.h3p> file.
 *            
 *            <eslEMEM> on allocation error.
 */
int
p7_oprofile_ReadRest(P7_HMMFILE *hfp, P7_OPROFILE *om)
{
  uint32_t       magic;
  int            M;
  int            alphatype;
  int            status = eslOK;

#ifdef HMMER_THREADS
  /* lock the mutex to prevent other threads from reading from the optimized
   * profile at the same time.
   */
  if (hfp->syncRead)
    {
      if (pthread_mutex_lock (&hfp->readMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex lock failed");
    }
#endif

  if (! fread( (char *) &magic,          sizeof(uint32_t), 1,           hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read magic");
  if (magic == v3a_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/a); please hmmpress your HMM file again");
  if (magic == v3b_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/b); please hmmpress your HMM file again");
  if (magic == v3c_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/c); please hmmpress your HMM file again");
  if (magic == v3d_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/d); please hmmpress your HMM file again");
  if (magic == v3e_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/e); please hmmpress your HMM file again");
  if (magic != v3f_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "bad magic; not an HMM database file?");

  if (! fread( (char *) &M,              sizeof(int),      1,           hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read model size M");
  if (! fread( (char *) &alphatype,      sizeof(int),      1,           hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read alphabet type");  
  if (! fread( (char *) &magic,          sizeof(uint32_t), 1,           hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "no sentinel magic: .h3p file corrupted?");

  if (magic     != v3f_pmagic)     ESL_XFAIL(eslEFORMAT, hfp->errbuf, "bad sentinel magic: .h3p file corrupted?");
  if (M         != om->M)          ESL_XFAIL(eslEFORMAT, hfp->errbuf, "p/f model length mismatch");
  if (alphatype != om->abc->type)  ESL_XFAIL(eslEFORMAT, hfp->errbuf, "p/f alphabet type mismatch");

#ifdef HMMER_THREADS
  if (hfp->syncRead)
    {
      if (pthread_mutex_unlock (&hfp->readMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed");
    }
#endif

  return eslOK;

 ERROR:
#ifdef HMMER_THREADS
  if (hfp->syncRead)
    {
      if (pthread_mutex_unlock (&hfp->readMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed");
    }
#endif

  return status;
}
Example #14
0
/* Function:  p7_oprofile_Position()
 * Synopsis:  Reposition an open hmm file to an offset.
 * Incept:    MSF, Thu Oct 15, 2009 [Janelia]
 *
 * Purpose:   Reposition an open <hfp> to offset <offset>.
 *            <offset> would usually be the first byte of a
 *            desired hmm record.
 *            
 * Returns:   <eslOK>     on success;
 *            <eslEOF>    if no data can be read from this position.
 *
 * Throws:    <eslEINVAL>  if the <sqfp> is not positionable.
 *            <eslEFORMAT> if no msv profile opened.
 *            <eslESYS>    if the fseeko() call fails.
 */
int
p7_oprofile_Position(P7_HMMFILE *hfp, off_t offset)
{
  if (hfp->ffp == NULL)  ESL_EXCEPTION(eslEFORMAT, hfp->errbuf, "no MSV profile file; hmmpress probably wasn't run");
  if (hfp->do_stdin)     ESL_EXCEPTION(eslEINVAL, "can't Position() in standard input");
  if (hfp->do_gzip)      ESL_EXCEPTION(eslEINVAL, "can't Position() in a gzipped file");
  if (offset < 0)        ESL_EXCEPTION(eslEINVAL, "bad offset");

  if (fseeko(hfp->ffp, offset, SEEK_SET) != 0) ESL_EXCEPTION(eslESYS, "fseeko() failed");

  return eslOK;
}
Example #15
0
/* Function:  p7_BackwardParser()
 * Synopsis:  The Backward algorithm, linear memory parsing version.
 * Incept:    SRE, Sat Aug 16 08:34:13 2008 [Janelia]
 *
 * Purpose:   Same as <p7_Backward()> except that the full matrix isn't
 *            kept. Instead, a linear $O(M+L)$ memory algorithm is
 *            used, keeping only the DP matrix values for the special
 *            (BENCJ) states. These are sufficient to do posterior
 *            decoding to identify high-probability regions where
 *            domains are.
 *       
 *            The caller must provide a suitably allocated "parsing"
 *            <bck> by calling <bck = p7_omx_Create(M, 0, L)> or
 *            <p7_omx_GrowTo(bck, M, 0, L)>.
 *
 * Args:      dsq     - digital target sequence, 1..L
 *            L       - length of dsq in residues          
 *            om      - optimized profile
 *            fwd     - filled Forward DP matrix, for scale factors
 *            bck     - RETURN: filled Backward matrix
 *            opt_sc  - optRETURN: Backward score (in nats)          
 *
 * Returns:   <eslOK> on success. 
 *
 * Throws:    <eslEINVAL> if <ox> allocation is too small, or if the profile
 *            isn't in local alignment mode.
 *            <eslERANGE> if the score exceeds the limited range of
 *            a probability-space odds ratio.
 *            In either case, <*opt_sc> is undefined.
 */
int 
p7_BackwardParser(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc)
{
#ifdef p7_DEBUGGING		
  if (om->M >  bck->allocQ4*4)    ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
  if (bck->validR < 1)            ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
  if (L     >= bck->allocXR)      ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
  if (L     != fwd->L)            ESL_EXCEPTION(eslEINVAL, "fwd matrix size doesn't agree with length L");
  if (! p7_oprofile_IsLocal(om))  ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
#endif

  return backward_engine(FALSE, dsq, L, om, fwd, bck, opt_sc);
}
Example #16
0
/* Function:  esl_workqueue_Complete()
 * Synopsis:  Signals the end of the queue.
 * Incept:    MSF, Thu Jun 18 11:51:39 2009
 *
 * Purpose:   Signal the end of the queue.  If there are any threads
 *            waiting on an object, signal them to wake up and complete
 *            their processing.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslESYS> if thread synchronization fails somewhere.
 *            <eslEINVAL> if something's wrong with <queue>.
 */
int 
esl_workqueue_Complete(ESL_WORK_QUEUE *queue)
{
  if (queue == NULL)                                ESL_EXCEPTION(eslEINVAL, "Invalid queue object");
  if (pthread_mutex_lock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS,   "mutex lock failed");

  if (queue->pendingWorkers != 0)
    {
      if (pthread_cond_broadcast (&queue->workerQueueCond) != 0) ESL_EXCEPTION(eslESYS, "broadcast failed");
    }

  if (pthread_mutex_unlock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed");

  return eslOK;
}
Example #17
0
/* Function:  p7_emit_FancyConsensus()
 * Synopsis:  Emit a fancier consensus with upper/lower case and N/X's.
 * Incept:    SRE, Fri May 14 09:33:10 2010 [Janelia]
 *
 * Purpose:   Generate a consensus sequence for model <hmm>, consisting
 *            of the maximum probability residue in each match state;
 *            store this sequence in text-mode <sq> provided by the caller.
 *            
 *            If the probability of the consensus residue is less than
 *            <min_lower>, show an ``any'' residue (N or X) instead.
 *            If the probability of the consensus residue is $\geq$
 *            <min_lower>  and less than <min_upper>, show the residue
 *            as lower case; if it is $\geq$ <min_upper>, show it as
 *            upper case.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEINVAL> if the <sq> isn't in text mode.
 *
 * Xref:      SRE:J6/59.
 */
int
p7_emit_FancyConsensus(const P7_HMM *hmm, float min_lower, float min_upper, ESL_SQ *sq)
{
  int   k, x;
  float p;
  char  c;
  int   status;

  if (! esl_sq_IsText(sq)) ESL_EXCEPTION(eslEINVAL, "p7_emit_FancyConsensus() expects a text-mode <sq>");

  if ((status = esl_sq_GrowTo(sq, hmm->M)) != eslOK) return status;

  for (k = 1; k <= hmm->M; k++)
  {

    if (hmm->mm && hmm->mm[k] == 'm') { //masked position, spit out the degenerate code
      if ((status = esl_sq_CAddResidue(sq, tolower(esl_abc_CGetUnknown(hmm->abc))) ) != eslOK) return status;
    } else {
      p = esl_vec_FMax(   hmm->mat[k], hmm->abc->K);
      x = esl_vec_FArgMax(hmm->mat[k], hmm->abc->K);
  
      if      (p <  min_lower)  c = tolower(esl_abc_CGetUnknown(hmm->abc));
      else if (p >= min_upper)  c = toupper(hmm->abc->sym[x]);
      else                      c = tolower(hmm->abc->sym[x]);

      if ((status = esl_sq_CAddResidue(sq, c)) != eslOK) return status;
    }
  }
  if ((status = esl_sq_CAddResidue(sq, '\0')) != eslOK) return status;
  return eslOK;
}
Example #18
0
/* Using FChoose() here would mean allocating tmp space for 2M-1 paths;
 * instead we use the fact that E(i) is itself the necessary normalization
 * factor, and implement FChoose's algorithm here for an on-the-fly 
 * calculation.
 */
static inline int
select_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k)
{
  int          Q     = p7O_NQF(ox->M);
  double       sum   = 0.0;
  double       roll  = esl_random(rng);
  double       norm  = 1.0 / ox->xmx[i*p7X_NXCELLS+p7X_E];   /* all M, D already scaled exactly the same */
  vector float xEv   = esl_vmx_set_float(norm);
  vector float zerov = (vector float) vec_splat_u32(0);
  union { vector float v; float p[4]; } u;
  int    q,r;

  while (1) {
    for (q = 0; q < Q; q++)
      {
	u.v = vec_madd(ox->dpf[i][q*3 + p7X_M], xEv, zerov);
	for (r = 0; r < 4; r++) {
	  sum += u.p[r];
	  if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_M;}
	}

	u.v = vec_madd(ox->dpf[i][q*3 + p7X_D], xEv, zerov);
	for (r = 0; r < 4; r++) {
	  sum += u.p[r];
	  if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_D;}
	}
      }
    ESL_DASSERT1(sum > 0.99);
  }
  /*UNREACHED*/
  ESL_EXCEPTION(-1, "unreached code was reached. universe collapses.");
} 
Example #19
0
/* Function:  esl_stats_Psi()
 * Synopsis:  Calculates $\Psi(x)$ (the digamma function).
 * Incept:    SRE, Tue Nov 15 13:57:59 2005 [St. Louis]
 *
 * Purpose:   Computes $\Psi(x)$ (the "digamma" function), which is
 *            the derivative of log of the Gamma function:
 *            $d/dx \log \Gamma(x) = \frac{\Gamma'(x)}{\Gamma(x)} = \Psi(x)$.
 *            Argument $x$ is $> 0$. 
 * 
 *            This is J.M. Bernardo's "Algorithm AS103",
 *            Appl. Stat. 25:315-317 (1976).  
 */
int
esl_stats_Psi(double x, double *ret_answer)
{
  double answer = 0.;
  double x2;

  if (x <= 0.0) ESL_EXCEPTION(eslERANGE, "invalid x <= 0 in esl_stats_Psi()");
  
  /* For small x, Psi(x) ~= -0.5772 - 1/x + O(x), we're done.
   */
  if (x <= 1e-5) {
    *ret_answer = -eslCONST_EULER - 1./x;
    return eslOK;
  }

  /* For medium x, use Psi(1+x) = \Psi(x) + 1/x to c.o.v. x,
   * big enough for Stirling approximation to work...
   */
  while (x < 8.5) {
    answer = answer - 1./x;
    x += 1.;
  }
  
  /* For large X, use Stirling approximation
   */
  x2 = 1./x;
  answer += log(x) - 0.5 * x2;
  x2 = x2*x2;
  answer -= (1./12.)*x2;
  answer += (1./120.)*x2*x2;
  answer -= (1./252.)*x2*x2*x2;

  *ret_answer = answer;
  return eslOK;
}
Example #20
0
/* Function:  esl_exp_FitCompleteBinned()
* Incept:    SRE, Sun Aug 21 13:07:22 2005 [St. Louis]
*
* Purpose:   Fit a complete exponential distribution to the observed
*            binned data in a histogram <g>, where each
*            bin i holds some number of observed samples x with values from 
*            lower bound l to upper bound u (that is, $l < x \leq u$);
*            find maximum likelihood parameters $\mu,\lambda$ and 
*            return them in <*ret_mu>, <*ret_lambda>.
*            
*            If the binned data in <g> were set to focus on 
*            a tail by virtual censoring, the "complete" exponential is 
*            fitted to this tail. The caller then also needs to
*            remember what fraction of the probability mass was in this
*            tail.
*            
*            The ML estimate for $mu$ is the smallest observed
*            sample.  For complete data, <ret_mu> is generally set to
*            the smallest observed sample value, except in the
*            special case of a "rounded" complete dataset, where
*            <ret_mu> is set to the lower bound of the smallest
*            occupied bin. For tails, <ret_mu> is set to the cutoff
*            threshold <phi>, where we are guaranteed that <phi> is
*            at the lower bound of a bin (by how the histogram
*            object sets tails). 
*
*            The ML estimate for <ret_lambda> has an analytical 
*            solution, so this routine is fast. 
*            
*            If all the data are in one bin, the ML estimate of
*            $\lambda$ will be $\infty$. This is mathematically correct,
*            but is probably a situation the caller wants to avoid, perhaps
*            by choosing smaller bins.
*
*            This function currently cannot fit an exponential tail
*            to truly censored, binned data, because it assumes that
*            all bins have equal width, but in true censored data, the
*            lower cutoff <phi> may fall anywhere in the first bin.
*
* Returns:   <eslOK> on success.
*
* Throws:    <eslEINVAL> if dataset is true-censored.
*/
int
esl_exp_FitCompleteBinned(ESL_HISTOGRAM *g, double *ret_mu, double *ret_lambda)
{
    int    i;
    double ai, bi, delta;
    double sa, sb;
    double mu = 0.;

	if (g->dataset_is == ESL_HISTOGRAM::COMPLETE)
    {
        if   (g->is_rounded) mu = esl_histogram_Bin2LBound(g, g->imin);
        else                 mu = g->xmin;
    }
    else if (g->dataset_is == ESL_HISTOGRAM::VIRTUAL_CENSORED) /* i.e., we'll fit to tail */
        mu = g->phi;
    else if (g->dataset_is == ESL_HISTOGRAM::TRUE_CENSORED)
        ESL_EXCEPTION(eslEINVAL, "can't fit true censored dataset");

    delta = g->w;
    sa = sb = 0.;
    for (i = g->cmin; i <= g->imax; i++) /* for each occupied bin */
    {
        if (g->obs[i] == 0) continue;
        ai = esl_histogram_Bin2LBound(g,i);
        bi = esl_histogram_Bin2UBound(g,i);
        sa += g->obs[i] * (ai-mu);
        sb += g->obs[i] * (bi-mu);
    }
    *ret_mu     = mu;
    *ret_lambda = 1/delta * (log(sb) - log(sa));
    return eslOK;
}
Example #21
0
/* Function:  esl_rmx_E2Q()
 * Incept:    SRE, Tue Jul 13 15:52:41 2004 [St. Louis]
 *
 * Purpose:   Given a lower triangular matrix ($j<i$) of 
 *            residue exchangeabilities <E>, and a stationary residue
 *            frequency vector <pi>; assuming $E_{ij} = E_{ji}$;
 *            calculates a rate matrix <Q> as
 *            
 *            $Q_{ij} = E_{ij} * \pi_j$
 *            
 *            The resulting <Q> is not normalized to any particular
 *            number of substitutions/site/time unit. See
 *            <esl_rmx_ScaleTo()> for that.
 *            
 * Args:      E     - symmetric residue "exchangeabilities";
 *                    only lower triangular entries are used.
 *            pi    - residue frequencies at stationarity. 
 *            Q     - RETURN: rate matrix, square (NxN). 
 *                    Caller allocates the memory for this.
 *                    
 * Returns:   <eslOK> on success; Q is calculated and filled in.
 * 
 * Xref:      STL8/p56.
 */
int
esl_rmx_E2Q(ESL_DMATRIX *E, double *pi, ESL_DMATRIX *Q)
{
  int          i,j;

  if (E->n != Q->n) ESL_EXCEPTION(eslEINVAL, "E and Q sizes differ");

  /* Scale all off-diagonals to pi[j] * E[i][j].
   */
  for (i = 0; i < E->n; i++)
    for (j = 0; j < i; j++)	/* only look at lower triangle of E. */
      {
	Q->mx[i][j] = pi[j] * E->mx[i][j]; 
	Q->mx[j][i] = pi[i] * E->mx[i][j];
      }

  /* Set diagonal to  -\sum of all j != i.
   */
  for (i = 0; i < Q->n; i++)
    {
      Q->mx[i][i] = 0.;		/* makes the vector sum work for j != i */
      Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], Q->n);
    }
  return eslOK;
}
int
profillic_eslx_msafile_Read(ESLX_MSAFILE *afp, ESL_MSA **ret_msa, ProfileType * profile_ptr)
{
  ESL_MSA  *msa    = NULL;
  int       status = eslOK;
#ifdef eslAUGMENT_SSI
  esl_pos_t offset = esl_buffer_GetOffset(afp->bf);
#endif

  switch (afp->format) {
  case eslMSAFILE_A2M:          if ((status = esl_msafile_a2m_Read      (afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_AFA:          if ((status = esl_msafile_afa_Read      (afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_CLUSTAL:      if ((status = esl_msafile_clustal_Read  (afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_CLUSTALLIKE:  if ((status = esl_msafile_clustal_Read  (afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_PFAM:         if ((status = esl_msafile_stockholm_Read(afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_PHYLIP:       if ((status = esl_msafile_phylip_Read   (afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_PHYLIPS:      if ((status = esl_msafile_phylip_Read   (afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_PSIBLAST:     if ((status = esl_msafile_psiblast_Read (afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_SELEX:        if ((status = esl_msafile_selex_Read    (afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_STOCKHOLM:    if ((status = esl_msafile_stockholm_Read(afp, &msa)) != eslOK) goto ERROR; break;
  case eslMSAFILE_PROFILLIC:    if ((status = profillic_esl_msafile_profile_Read(afp, &msa, profile_ptr)) != eslOK) goto ERROR; break;
  default:                      ESL_EXCEPTION(eslEINCONCEIVABLE, "no such msa file format"); break;
  }
  
#ifdef eslAUGMENT_SSI
  msa->offset = offset;
#endif
  *ret_msa = msa;
  return eslOK;

 ERROR:
  if (msa) esl_msa_Destroy(msa);
  *ret_msa = NULL;
  return status;
}
/* Function:  esl_randomness_Init()
 * Synopsis:  Reinitialize an RNG.           
 * Incept:    SRE, Wed Jul 14 13:13:05 2004 [St. Louis]
 *
 * Purpose:   Reset and reinitialize an existing <ESL_RANDOMNESS>
 *            object. 
 *            
 *            (Not generally recommended. This does not make a
 *            sequence of numbers more random, and may make it less
 *            so.)
 *
 * Args:      r     - randomness object
 *            seed  - new seed to use; >0.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEINVAL> if seed is $<= 0$.
 *
 * Xref:      STL8/p57.
 */
int
esl_randomness_Init(ESL_RANDOMNESS *r, long seed)
{
  int burnin = 7;
  if (seed <= 0) ESL_EXCEPTION(eslEINVAL, "bad seed");
  r->seed = seed;
  while (burnin--) esl_random(r);
  return eslOK;
}
Example #24
0
/* Function:  esl_recorder_MarkBlock()
 * Synopsis:  Mark first line to be saved in a block.
 * Incept:    SRE, Fri Jan  1 11:13:53 2010 [Magallon]
 *
 * Purpose:   Mark line number <markline> (0..N-1) in a file being read
 *            through the <ESL_RECORDER> <rc> as the first line in a
 *            block of lines to be parsed later, when the end of
 *            the block is found. 
 *            
 *            This mark makes sure that the <ESL_RECORDER> will keep
 *            the entire block of lines in memory, starting at or
 *            before the mark. When a mark is active,
 *            <esl_recorder_Read()> will reallocate and grow the
 *            recorder as necessary, rather than overwriting the mark.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEINVAL> if the <markline> has already passed out
 *            of the recorder's memory.
 */
int
esl_recorder_MarkBlock(ESL_RECORDER *rc, int markline)
{
  int line0 = ESL_MAX(rc->baseline, rc->nread - rc->nalloc);
  
  if (markline < line0) ESL_EXCEPTION(eslEINVAL, "recorder window already passed marked line");
  rc->markline = markline;
  return eslOK;
}
Example #25
0
/* Function:  esl_rmx_SetWAG()
 * Incept:    SRE, Thu Mar  8 18:00:00 2007 [Janelia]
 *
 * Purpose:   Sets a $20 \times 20$ rate matrix <Q> to WAG parameters.
 *            The caller allocated <Q>.
 *
 *            If <pi> is non-<NULL>, it provides a vector of 20 amino
 *            acid stationary probabilities in Easel alphabetic order,
 *            A..Y, and the WAG stationary probabilities are set to
 *            these desired $\pi_i$. If <pi> is <NULL>, the default
 *            WAG stationary probabilities are used.
 *            
 *            The WAG parameters are a maximum likelihood
 *            parameterization obtained by Whelan and Goldman
 *            \citep{WhelanGoldman01}.
 *            
 * Note:      The data table was reformatted from wag.dat by the UTILITY1
 *            executable in the paml module. The wag.dat file was obtained from
 *            \url{http://www.ebi.ac.uk/goldman/WAG/wag.dat}. A copy
 *            is in formats/wag.dat.
 *
 * Args:      Q   - a 20x20 rate matrix to set, allocated by caller.
 *            pi  - desired stationary probabilities A..Y, or
 *                  NULL to use WAG defaults.
 *
 * Returns:   <eslOK> on success.
 * 
 * Throws:    <eslEINVAL> if <Q> isn't a 20x20 general matrix; and
 *            the state of <Q> is undefined.
 */
int
esl_rmx_SetWAG(ESL_DMATRIX *Q, double *pi)
{
  static double wagE[190] = {
    1.027040, 0.738998, 0.030295, 1.582850, 0.021352, 6.174160, 0.210494, 0.398020, 0.046730, 0.081134,
    1.416720, 0.306674, 0.865584, 0.567717, 0.049931, 0.316954, 0.248972, 0.930676, 0.570025, 0.679371,
    0.249410, 0.193335, 0.170135, 0.039437, 0.127395, 1.059470, 0.030450, 0.138190, 0.906265, 0.074034,
    0.479855, 2.584430, 0.088836, 0.373558, 0.890432, 0.323832, 0.397915, 0.384287, 0.084805, 0.154263,
    2.115170, 0.061304, 0.499462, 3.170970, 0.257555, 0.893496, 0.390482, 0.103754, 0.315124, 1.190630,
    0.174100, 0.404141, 4.257460, 0.934276, 4.854020, 0.509848, 0.265256, 5.429420, 0.947198, 0.096162,
    1.125560, 3.956290, 0.554236, 3.012010, 0.131528, 0.198221, 1.438550, 0.109404, 0.423984, 0.682355,
    0.161444, 0.243570, 0.696198, 0.099929, 0.556896, 0.415844, 0.171329, 0.195081, 0.908598, 0.098818,
    0.616783, 5.469470, 0.099921, 0.330052, 4.294110, 0.113917, 3.894900, 0.869489, 1.545260, 1.543640,
    0.933372, 0.551571, 0.528191, 0.147304, 0.439157, 0.102711, 0.584665, 2.137150, 0.186979, 5.351420,
    0.497671, 0.683162, 0.635346, 0.679489, 3.035500, 3.370790, 1.407660, 1.071760, 0.704939, 0.545931,
    1.341820, 0.740169, 0.319440, 0.967130, 0.344739, 0.493905, 3.974230, 1.613280, 1.028870, 1.224190,
    2.121110, 0.512984, 0.374866, 0.822765, 0.171903, 0.225833, 0.473307, 1.458160, 1.386980, 0.326622,
    1.516120, 2.030060, 0.795384, 0.857928, 0.554413, 4.378020, 2.006010, 1.002140, 0.152335, 0.588731,
    0.649892, 0.187247, 0.118358, 7.821300, 0.305434, 1.800340, 2.058450, 0.196246, 0.314887, 0.301281,
    0.251849, 0.232739, 1.388230, 0.113133, 0.717070, 0.129767, 0.156557, 1.529640, 0.336983, 0.262569,
    0.212483, 0.137505, 0.665309, 0.515706, 0.071917, 0.139405, 0.215737, 1.163920, 0.523742, 0.110864,
    0.365369, 0.240735, 0.543833, 0.325711, 0.196303, 6.454280, 0.103604, 3.873440, 0.420170, 0.133264,
    0.398618, 0.428437, 1.086000, 0.216046, 0.227710, 0.381533, 0.786993, 0.291148, 0.314730, 2.485390};
  static double wagpi[20];
  int i,j,z;
  
  if (Q->m != 20 || Q->n != 20 || Q->type != eslGENERAL)
    ESL_EXCEPTION(eslEINVAL, "Q must be a 20x20 general matrix");
  esl_composition_WAG(wagpi);

  /* 1. Transfer the wag E lower triagonal matrix directly into Q. */
  z = 0;
  for (i = 0; i < 20; i++)
    {
      Q->mx[i][i] = 0.; /* code below depends on this zero initialization */
      for (j = 0; j < i; j++) {
	Q->mx[i][j] = wagE[z++];
	Q->mx[j][i] = Q->mx[i][j];
      }
    }

  /* 2. Set offdiagonals Q_ij = E_ij * pi_j */
  for (i = 0; i < 20; i++)
    for (j = 0; j < 20; j++)
      if (pi != NULL) Q->mx[i][j] *= pi[j];
      else            Q->mx[i][j] *= wagpi[j];

  /* 3. Set diagonal Q_ii to -\sum_{i \neq j} Q_ij */
  for (i = 0; i < 20; i++)
    Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], 20);
  
  /* 4. Renormalize matrix to units of 1 substitution/site. */
  if (pi != NULL) esl_rmx_ScaleTo(Q, pi,    1.0);
  else            esl_rmx_ScaleTo(Q, wagpi, 1.0);

  return eslOK;
}
Example #26
0
/* Function:  esl_msashuffle_Bootstrap()
 * Synopsis:  Bootstrap sample an MSA.
 * Incept:    SRE, Tue Jan 22 11:05:07 2008 [Janelia]
 *
 * Purpose:   Takes a bootstrap sample of <msa> (sample <alen> columns,
 *            with replacement) and puts it in <bootsample>, using
 *            random generator <r>. 
 *            
 *            The caller provides allocated space for <bootsample>.
 *            It must be different space than <msa>; you cannot take
 *            a bootstrap sample "in place". The caller sets up the
 *            rest of the data in <bootsample> (everything but the
 *            alignment itself) the way it wants, including sequence
 *            names, MSA name, and other annotation. The easy thing to
 *            do is to initialize <bootsample> by cloning <msa>.
 *
 *            The alignments <msa> and <bootsample> can both be in digital
 *            mode, or can both be in text mode; you cannot mix
 *            digital and text modes.
 *
 * Returns:   <eslOK> on success, and the alignment in <bootsample> is
 *            set to be a bootstrap resample of the alignment in <msa>.
 *
 * Throws:    <eslEINVAL> if <msa>,<bootsample> aren't in the same mode
 *            (digital vs. text).
 */
int 
esl_msashuffle_Bootstrap(ESL_RANDOMNESS *r, ESL_MSA *msa, ESL_MSA *bootsample)
{
  int i, pos, col;

  /* contract checks */
  if (  (msa->flags & eslMSA_DIGITAL) && ! (bootsample->flags & eslMSA_DIGITAL))
    ESL_EXCEPTION(eslEINVAL, "<msa> and <bootsample> must both be in digital or text mode");
  if (! (msa->flags & eslMSA_DIGITAL) &&   (bootsample->flags & eslMSA_DIGITAL))
    ESL_EXCEPTION(eslEINVAL, "<msa> and <bootsample> must both be in digital or text mode");

  if (! (msa->flags & eslMSA_DIGITAL))
    {
      for (pos = 0; pos < msa->alen; pos++)
	{
	  col = esl_rnd_Roll(r, msa->alen);
	  for (i = 0; i < msa->nseq; i++)
	    bootsample->aseq[i][pos] = msa->aseq[i][col];
	}

      for (i = 0; i < msa->nseq; i++)
	bootsample->aseq[i][msa->alen] = '\0';
    }
#ifdef eslAUGMENT_ALPHABET
  else
    {
      for (i = 0; i < msa->nseq; i++)
	bootsample->ax[i][0] = eslDSQ_SENTINEL;

      for (pos = 1; pos <= msa->alen; pos++)
	{
	  col = esl_rnd_Roll(r, msa->alen) + 1;
	  for (i = 0; i < msa->nseq; i++)
	    bootsample->ax[i][pos] = msa->ax[i][col];
	}

      for (i = 0; i < msa->nseq; i++)
	bootsample->ax[i][msa->alen+1] = eslDSQ_SENTINEL;
    }
#endif /*eslAUGMENT_ALPHABET*/

  return eslOK;
}
Example #27
0
/* Function:  p7_GOATrace()
 * Synopsis:  Optimal accuracy decoding: traceback.
 * Incept:    SRE, Fri Feb 29 12:59:11 2008 [Janelia]
 *
 * Purpose:   The traceback stage of the optimal accuracy decoding algorithm
 *            \citep{Kall05}.
 *            
 *            Caller provides the OA DP matrix <gx> that was just
 *            calculated by <p7_GOptimalAccuracy()>, as well as the
 *            posterior decoding matrix <pp>, which was calculated by
 *            Forward/Backward on a target sequence of length <L>
 *            using the query model <gm>.
 *            
 *            Caller provides an empty traceback structure <tr> to
 *            hold the result, allocated to hold optional posterior
 *            probability annotation on residues (with
 *            <p7_trace_CreateWithPP()>, generally).  This will be
 *            internally reallocated as needed for larger traces.
 *
 * Args:      gm    - query profile      
 *            pp    - posterior decoding matrix created by <p7_PosteriorDecoding()>
 *            gx    - OA DP matrix calculated by  <p7_OptimalAccuracyDP()>
 *            tr    - RESULT: OA traceback, allocated with posterior probs
 *
 * Returns:   <eslOK> on success, and <tr> contains the OA traceback.
 *
 * Throws:    <eslEMEM> on allocation error.
 */
int
p7_GOATrace(const P7_PROFILE *gm, const P7_GMX *pp, const P7_GMX *gx, P7_TRACE *tr)
{
  int           i   = gx->L;	/* position in seq (1..L)         */
  int           k   = 0;	/* position in model (1..M)       */
  float        postprob;
  int          sprv, scur;
  int          status;

#ifdef p7_DEBUGGING
  if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace isn't empty: forgot to Reuse()?");
#endif

  if ((status = p7_trace_AppendWithPP(tr, p7T_T, k, i, 0.0)) != eslOK) return status;
  if ((status = p7_trace_AppendWithPP(tr, p7T_C, k, i, 0.0)) != eslOK) return status;

  sprv = p7T_C;
  while (sprv != p7T_S) 
    {
      switch (sprv) {
      case p7T_M: scur = select_m(gm,     gx, i, k);  k--; i--; break;
      case p7T_D: scur = select_d(gm,     gx, i, k);  k--;      break;
      case p7T_I: scur = select_i(gm,     gx, i, k);       i--; break;
      case p7T_N: scur = select_n(i);                           break;
      case p7T_C: scur = select_c(gm, pp, gx, i);               break;
      case p7T_J: scur = select_j(gm, pp, gx, i);               break;
      case p7T_E: scur = select_e(gm,     gx, i, &k);           break;
      case p7T_B: scur = select_b(gm,     gx, i);               break;
      default: ESL_EXCEPTION(eslEINVAL, "bogus state in traceback");
      }
      if (scur == -1) ESL_EXCEPTION(eslEINVAL, "OA traceback choice failed");

      postprob = get_postprob(pp, scur, sprv, k, i);
      if ((status = p7_trace_AppendWithPP(tr, scur, k, i, postprob)) != eslOK) return status;

      /* For NCJ, we had to defer i decrement. */
      if ( (scur == p7T_N || scur == p7T_J || scur == p7T_C) && scur == sprv) i--;
      sprv = scur;
    }
  tr->M = gm->M;
  tr->L = gx->L;
  return p7_trace_Reverse(tr);
}
Example #28
0
/* Function:  esl_workqueue_Dump()
 * Synopsis:  Print the contents of the queues.
 * Incept:    MSF, Thu Jun 18 11:51:39 2009
 *
 * Purpose:   Print the contents of the queues and their pointers.
 *
 * Returns:   <eslOK> on success.
 */
int esl_workqueue_Dump(ESL_WORK_QUEUE *queue)
{
  int i;

  if (queue == NULL)                                ESL_EXCEPTION(eslEINVAL, "Invalid queue object");
  if (pthread_mutex_lock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS,   "mutex lock failed");

  printf ("Reader head: %2d  count: %2d\n", queue->readerQueueHead, queue->readerQueueCnt);
  printf ("Worker head: %2d  count: %2d\n", queue->workerQueueHead, queue->workerQueueCnt);
  for (i = 0; i < queue->queueSize; ++i)
    {
      printf ("  %2d:  %p  %p\n", i, queue->readerQueue[i], queue->workerQueue[i]);
    }
  printf ("Pending: %2d\n\n", queue->pendingWorkers);

  if (pthread_mutex_unlock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed");

  return eslOK;
}
Example #29
0
int
p7_sparsemask_StartRow_avx512(P7_SPARSEMASK *sm, int i)
{
 #ifdef HAVE_AVX512 
  int r;
  int status;
  
#ifdef p7_DEBUGGING
  if (i < 1 || i > sm->L) ESL_EXCEPTION(eslEINVAL, "i is 1..L: sequence position");
  if (sm->last_i <= i)    ESL_EXCEPTION(eslEINVAL, "rows need to be added in reverse order L..1");
#endif

  /* Make sure kmem has enough memory; if not, double it.
   * Because we know the original allocation was enough to hold
   * the slots, we know that doubling (even if ncells has filled
   * the current kalloc) is sufficient.
   */
  if (sm->ncells_AVX_512 + p7_VNF_AVX_512*sm->Q_AVX_512 > sm->kalloc_AVX_512)
    {
      int64_t kalloc_req = sm->kalloc_AVX_512 * 2;
      ESL_REALLOC(sm->kmem_AVX_512, sizeof(int) * kalloc_req);
      sm->kalloc_AVX_512 = kalloc_req;
      sm->n_krealloc++;
    }
  
  for (r = 0; r < p7_VNF_AVX_512; r++)
    {
      sm->s_AVX_512[p7_VNF_AVX_512-r-1] = sm->kmem_AVX_512 + sm->ncells_AVX_512 + r*sm->Q_AVX_512;
      sm->sn_AVX_512[r]         = 0;
    }
  sm->last_i_AVX_512 = i;
  for (r = 0; r < p7_VNF_AVX_512; r++) 
    sm->last_k_AVX_512[r] = sm->M+1;    /* sentinel to be sure that Add() is called in reverse order M..1 */
  return eslOK;
  
 ERROR:
  return status;
  #endif //HAVE_AVX512
  #ifndef HAVE_AVX512
  return eslENORESULT;
  #endif
}
Example #30
0
/* Function:  p7_StochasticTrace()
 * Synopsis:  Sample a traceback from a Forward matrix
 * Incept:    SRE, Fri Aug  8 17:40:18 2008 [UA217, IAD-SFO]
 *
 * Purpose:   Perform a stochastic traceback from Forward matrix <ox>,
 *            using random number generator <r>, in order to sample an
 *            alignment of model <om> to digital sequence <dsq> of
 *            length <L>. 
 *            
 *            The sampled traceback is returned in <tr>, which the
 *            caller provides with at least an initial allocation;
 *            the <tr> allocation will be grown as needed here.
 *
 * Args:      r   - source of random numbers
 *            dsq - digital sequence being aligned, 1..L
 *            L   - length of dsq
 *            om  - profile
 *            ox  - Forward matrix to trace, LxM
 *            tr  - storage for the recovered traceback
 *
 * Returns:   <eslOK> on success
 *
 * Throws:    <eslEMEM> on allocation error.
 *            <eslEINVAL> on several types of problems, including:
 *            the trace isn't empty (wasn't Reuse()'d);
 */
int
p7_StochasticTrace(ESL_RANDOMNESS *rng, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *ox,
		   P7_TRACE *tr)
{
  int   i;			/* position in sequence 1..L */
  int   k;			/* position in model 1..M */
  int   s0, s1;			/* choice of a state */
  int   status;			
  
  if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace not empty; needs to be Reuse()'d?");

  i = L;			
  k = 0;
  if ((status = p7_trace_Append(tr, p7T_T, k, i)) != eslOK) return status;
  if ((status = p7_trace_Append(tr, p7T_C, k, i)) != eslOK) return status;
  s0 = tr->st[tr->N-1];
  while (s0 != p7T_S)
    {
      switch (s0) {
      case p7T_M: s1 = select_m(rng, om, ox, i, k);  k--; i--; break;
      case p7T_D: s1 = select_d(rng, om, ox, i, k);  k--;      break;
      case p7T_I: s1 = select_i(rng, om, ox, i, k);       i--; break;
      case p7T_N: s1 = select_n(i);                            break;
      case p7T_C: s1 = select_c(rng, om, ox, i);               break;
      case p7T_J: s1 = select_j(rng, om, ox, i);               break;
      case p7T_E: s1 = select_e(rng, om, ox, i, &k);           break;
      case p7T_B: s1 = select_b(rng, om, ox, i);               break;
      default: ESL_EXCEPTION(eslEINVAL, "bogus state in traceback");
      }
      if (s1 == -1) ESL_EXCEPTION(eslEINVAL, "Stochastic traceback choice failed");

      if ((status = p7_trace_Append(tr, s1, k, i)) != eslOK) return status;

      if ( (s1 == p7T_N || s1 == p7T_J || s1 == p7T_C) && s1 == s0) i--;
      s0 = s1;
    } /* end traceback, at S state */

  tr->M = om->M;
  tr->L = L;
  return p7_trace_Reverse(tr);
}