/* Function: esl_workqueue_Reset() * Synopsis: Reset the queue for another run. * Incept: MSF, Thu Jun 18 11:51:39 2009 * * Purpose: Reset the queue for another run. This is done by moving * all the queued object to the reader's list (i.e. producer). * * Returns: <eslOK> on success. * * Throws: <eslESYS> if thread synchronization fails somewhere. * <eslEINVAL> if something's wrong with <queue>. */ int esl_workqueue_Reset(ESL_WORK_QUEUE *queue) { int inx; int queueSize; if (queue == NULL) ESL_EXCEPTION(eslEINVAL, "Invalid queue object"); if (pthread_mutex_lock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex lock failed"); queueSize = queue->queueSize; /* move all buffers back to the reader queue */ while (queue->workerQueueCnt > 0) { inx = (queue->readerQueueHead + queue->readerQueueCnt) % queueSize; queue->readerQueue[inx] = queue->workerQueue[queue->workerQueueHead]; ++queue->readerQueueCnt; queue->workerQueue[queue->workerQueueHead] = NULL; queue->workerQueueHead = (queue->workerQueueHead + 1) % queueSize; --queue->workerQueueCnt; } queue->pendingWorkers = 0; if (pthread_mutex_unlock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed"); return eslOK; }
/* Function: esl_rmx_ValidateQ() * Incept: SRE, Sun Mar 11 10:30:50 2007 [Janelia] * * Purpose: Validates an instantaneous rate matrix <Q> for a * continuous-time Markov process, whose elements $q_{ij}$ * represent instantaneous transition rates $i \rightarrow * j$. * * Rows satisfy the condition that * $q_{ii} = -\sum_{i \neq j} q_{ij}$, and also * that $q_{ij} \geq 0$ for all $j \neq i$. * * <tol> specifies the floating-point tolerance to which * that condition must hold: <fabs(sum-q_ii) <= tol>. * * <errbuf> is an optional error message buffer. The caller * may pass <NULL> or a pointer to a buffer of at least * <eslERRBUFSIZE> characters. * * Args: Q - rate matrix to validate * tol - floating-point tolerance (0.00001, for example) * errbuf - OPTIONAL: ptr to an error buffer of at least * <eslERRBUFSIZE> characters. * * Returns: <eslOK> on successful validation. * <eslFAIL> on failure, and if a non-<NULL> <errbuf> was * provided by the caller, a message describing * the reason for the failure is put there. * * Throws: (no abnormal error conditions) */ int esl_rmx_ValidateQ(ESL_DMATRIX *Q, double tol, char *errbuf) { int i,j; double qi; if (Q->type != eslGENERAL) ESL_EXCEPTION(eslEINVAL, "Q must be type eslGENERAL to be validated"); if (Q->n != Q->m) ESL_EXCEPTION(eslEINVAL, "a rate matrix Q must be square"); for (i = 0; i < Q->n; i++) { qi = 0.; for (j = 0; j < Q->m; j++) { if (i != j) { if (Q->mx[i][j] < 0.) ESL_FAIL(eslFAIL, errbuf, "offdiag elem %d,%d < 0",i,j); qi += Q->mx[i][j]; } else { if (Q->mx[i][j] > 0.) ESL_FAIL(eslFAIL, errbuf, "diag elem %d,%d < 0", i,j); } } if (fabs(qi + Q->mx[i][i]) > tol) ESL_FAIL(eslFAIL, errbuf, "row %d does not sum to 0.0", i); } return eslOK; }
/* Function: esl_dmatrix_Copy() * * Purpose: Copies <src> matrix into <dest> matrix. <dest> must * be allocated already by the caller. * * You may copy to a matrix of a different type, so long as * the copy makes sense. If <dest> matrix is a packed type * and <src> is not, the values that should be zeros must * be zero in <src>, else the routine throws * <eslEINCOMPAT>. If the <src> matrix is a packed type and * <dest> is not, the values that are implicitly zeros are * set to zeros in the <dest> matrix. * * Returns: <eslOK> on success. * * Throws: <eslEINCOMPAT> if <src>, <dest> are different sizes, * or if their types differ and <dest> cannot represent * <src>. */ int esl_dmatrix_Copy(const ESL_DMATRIX *src, ESL_DMATRIX *dest) { int i,j; if (dest->n != src->n || dest->m != src->m) ESL_EXCEPTION(eslEINCOMPAT, "matrices of different size"); if (src->type == dest->type) /* simple case. */ memcpy(dest->mx[0], src->mx[0], src->ncells * sizeof(double)); else if (src->type == eslGENERAL && dest->type == eslUPPER) { for (i = 1; i < src->n; i++) for (j = 0; j < i; j++) if (src->mx[i][j] != 0.) ESL_EXCEPTION(eslEINCOMPAT, "general matrix isn't upper triangular, can't be copied/packed"); for (i = 0; i < src->n; i++) for (j = i; j < src->m; j++) dest->mx[i][j] = src->mx[i][j]; } else if (src->type == eslUPPER && dest->type == eslGENERAL) { for (i = 1; i < src->n; i++) for (j = 0; j < i; j++) dest->mx[i][j] = 0.; for (i = 0; i < src->n; i++) for (j = i; j < src->m; j++) dest->mx[i][j] = src->mx[i][j]; } return eslOK; }
/* Using FChoose() here would mean allocating tmp space for 2M-1 paths; * instead we use the fact that E(i) is itself the necessary normalization * factor, and implement FChoose's algorithm here for an on-the-fly * calculation. */ static inline int select_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k) { int Q = p7O_NQF(ox->M); float sum = 0.0; float roll = esl_random(rng); float norm = 1.0 / ox->xmx[i*p7X_NXCELLS+p7X_E]; /* all M, D already scaled exactly the same */ __m128 xEv = _mm_set1_ps(norm); union { __m128 v; float p[4]; } u; int q,r; while (1) { for (q = 0; q < Q; q++) { u.v = _mm_mul_ps(ox->dpf[i][q*3 + p7X_M], xEv); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_M;} } u.v = _mm_mul_ps(ox->dpf[i][q*3 + p7X_D], xEv); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_D;} } } if (sum < 0.99) ESL_EXCEPTION(-1, "Probabilities weren't normalized - failed to trace back from an E"); } /*UNREACHED*/ ESL_EXCEPTION(-1, "unreached code was reached. universe collapses."); }
/* Function: esl_recorder_ResizeTo() * Synopsis: Reallocate an <ESL_RECORDER> for a new <maxlines> * Incept: SRE, Fri Dec 25 17:02:46 2009 [Casa de Gatos] * * Purpose: Reallocate the <ESL_RECORDER> <rc> to have a new * window size <maxlines>. * * The new <maxlines> may be more or less than the previous * window size for <rc>. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> if (re-)allocation fails. * * <eslEINVAL> if the recorder has a marked line (for start * of a block) and you try to shrink it so much that that * marked line would be lost. * * <eslEINCONCEIVABLE> on any baseline resetting problem; * this would have to be an internal error in the module. * * Note: We may have to repermute the line array, and reset its * baseline, as follows. * * In the growth case: if the line array is out of order * (circularly permuted) we must straighten it out, which * means resetting the baseline. * i.e. to grow 3 1 2 to nalloc=6, we need 1 2 3 x x x; * simple reallocation to 3 1 2 x x x doesn't work, * next read would make 3 4 2 x x x. * * In the shrinkage case: if the line array is in use beyond the * new array size, we set a new baseline to keep as much of the * old array as possible. * * i.e. for 6->3 * 1 2 3 x x x -> 1 2 3 * 1 2 3 4 x x -> 2 3 4 with new baseline=2. * 4 5 0 1 2 3 -> 3 4 5 with new baseline=3 */ int esl_recorder_ResizeTo(ESL_RECORDER *rc, int new_maxlines) { int idx; int newbase; void *tmp; int minlines; int status; if (new_maxlines == rc->nalloc) return eslOK; if (new_maxlines > rc->nalloc) /* growth case */ { if ((rc->nread - rc->baseline) / rc->nalloc != 0) /* array is permuted; reorder it */ { newbase = ESL_MAX(rc->baseline, rc->nread - rc->nalloc); status = recorder_new_baseline(rc, newbase); if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly"); } } else /* shrinkage case */ { /* check that the marked line (if any) will stay in window */ if (rc->markline >= 0) { minlines = rc->nread - rc->markline; if (new_maxlines < minlines) ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing marked line"); } /* check that current line will stay in window */ minlines = rc->nread - rc->ncurr + 1; if (new_maxlines < minlines) ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing current line"); if (rc->nread - rc->baseline > new_maxlines) /* baseline needs to move up */ { newbase = rc->nread - new_maxlines; status = recorder_new_baseline(rc, newbase); if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly"); } for (idx = new_maxlines; idx < rc->nalloc; idx++) if (rc->line[idx]) free(rc->line[idx]); } ESL_RALLOC(rc->line, tmp, sizeof(char *) * new_maxlines); ESL_RALLOC(rc->lalloc, tmp, sizeof(int) * new_maxlines); ESL_RALLOC(rc->offset, tmp, sizeof(off_t) * new_maxlines); for (idx = rc->nalloc; idx < new_maxlines; idx++) /* no-op in shrinkage case */ { rc->line[idx] = NULL; rc->lalloc[idx] = 0; rc->offset[idx] = 0; } rc->nalloc = new_maxlines; return eslOK; ERROR: return status; }
/* Function: p7_hmm_mpi_Send() * Synopsis: Send an HMM as an MPI work unit. * * Purpose: Sends an HMM <hmm> as a work unit to MPI process * <dest> (where <dest> ranges from 0..<nproc-1>), tagged * with MPI tag <tag>, for MPI communicator <comm>, as * the sole workunit or result. * * Work units are prefixed by a status code indicating the * number of HMMs sent. If <hmm> is <NULL>, this code is 0, * and <_Recv()> interprets such a unit as an EOD * (end-of-data) signal, a signal to cleanly shut down * worker processes. * * In order to minimize alloc/free cycles in this routine, * caller passes a pointer to a working buffer <*buf> of * size <*nalloc> characters. If necessary (i.e. if <hmm> is * too big to fit), <*buf> will be reallocated and <*nalloc> * increased to the new size. As a special case, if <*buf> * is <NULL> and <*nalloc> is 0, the buffer will be * allocated appropriately, but the caller is still * responsible for free'ing it. * * Returns: <eslOK> on success; <*buf> may have been reallocated and * <*nalloc> may have been increased. * * Throws: <eslESYS> if an MPI call fails; <eslEMEM> if a malloc/realloc * fails. In either case, <*buf> and <*nalloc> remain valid and useful * memory (though the contents of <*buf> are undefined). * * Note: Compare to p7_hmmfile_WriteBinary(). The two operations (sending * an HMM via MPI, or saving it as a binary file to disk) are * similar. */ int p7_hmm_mpi_Send(const P7_HMM *hmm, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc) { int n = 0; int code; int sz, pos; int status; /* Figure out size. We always send at least a status code (0=EOD=nothing sent) */ if ( MPI_Pack_size(1, MPI_INT, comm, &sz) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi pack size failed"); n += sz; if ((status = p7_hmm_mpi_PackSize(hmm, comm, &sz)) != eslOK) return status; n += sz; /* Make sure the buffer is allocated appropriately */ if (*buf == NULL || n > *nalloc) { ESL_REALLOC(*buf, sizeof(char) * n); *nalloc = n; } /* Pack the status code and HMM into the buffer */ /* The status code is the # of HMMs being sent as one MPI message; here 1 or 0 */ pos = 0; code = (hmm ? 1 : 0); if (MPI_Pack(&code, 1, MPI_INT, *buf, n, &pos, comm) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi pack failed"); if (hmm && (status = p7_hmm_mpi_Pack(hmm, *buf, n, &pos, comm)) != eslOK) return status; /* Send the packed HMM to the destination. */ if (MPI_Send(*buf, n, MPI_PACKED, dest, tag, comm) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi send failed"); return eslOK; ERROR: return status; }
/* Function: p7_hmm_mpi_Recv() * Synopsis: Receives an HMM as a work unit from an MPI sender. * * Purpose: Receive a work unit that consists of a single HMM * sent by MPI <source> (<0..nproc-1>, or * <MPI_ANY_SOURCE>) tagged as <tag> for MPI communicator <comm>. * * Work units are prefixed by a status code that gives the * number of HMMs to follow; here, 0 or 1 (but in the future, * we could easily extend to sending several HMMs in one * packed buffer). If we receive a 1 code and we successfully * unpack an HMM, this routine will return <eslOK> and a non-<NULL> <*ret_hmm>. * If we receive a 0 code (a shutdown signal), * this routine returns <eslEOD> and <*ret_hmm> is <NULL>. * * Caller provides a working buffer <*buf> of size * <*nalloc> characters. These are passed by reference, so * that <*buf> can be reallocated and <*nalloc> increased * if necessary. As a special case, if <*buf> is <NULL> and * <*nalloc> is 0, the buffer will be allocated * appropriately, but the caller is still responsible for * free'ing it. * * Caller may or may not already know what alphabet the HMM * is expected to be in. A reference to the current * alphabet is passed in <byp_abc>. If the alphabet is unknown, * pass <*byp_abc = NULL>, and when the HMM is received, an * appropriate new alphabet object is allocated and passed * back to the caller via <*abc>. If the alphabet is * already known, <*byp_abc> is that alphabet, and the new * HMM's alphabet type is verified to agree with it. This * mechanism allows an application to let the first HMM * determine the alphabet type for the application, while * still keeping the alphabet under the application's scope * of control. * * Args: source - index of MPI sender, 0..nproc-1 (0=master), or MPI_ANY_SOURCE * tag - MPI message tag; MPI_ANY_TAG, or a specific message tag (0..32767 will work on any MPI) * comm - MPI communicator; MPI_COMM_WORLD, or a specific MPI communicator * buf - working buffer (for receiving packed message); * if <*buf> == NULL, a <*buf> is allocated and returned; * if <*buf> != NULL, it is used (and may be reallocated) * nalloc - allocation size of <*buf> in bytes; pass 0 if <*buf==NULL>. * byp_abc - BYPASS: <*byp_abc> == ESL_ALPHABET *> if known; * <*byp_abc> == NULL> if alphabet unknown. * ret_hmm - RETURN: newly allocated/received profile * * Returns: <eslOK> on success. <*ret_hmm> contains the received HMM; * it is allocated here, and the caller is responsible for * free'ing it. <*buf> may have been reallocated to a * larger size, and <*nalloc> may have been increased. If * <*abc> was passed as <NULL>, it now points to an * <ESL_ALPHABET> object that was allocated here; caller is * responsible for free'ing this. * * Returns <eslEOD> if an end-of-data signal was received. * In this case, <*buf>, <*nalloc>, and <*abc> are left unchanged, * and <*ret_hmm> is <NULL>. * * Returns <eslEINCOMPAT> if the HMM is in a different alphabet * than <*abc> said to expect. In this case, <*abc> is unchanged, * <*buf> and <*nalloc> may have been changed, and <*ret_hmm> is * <NULL>. * * Throws: <eslEMEM> on allocation error, and <eslESYS> on MPI communication * errors; in either case <*ret_hmm> is <NULL>. */ int p7_hmm_mpi_Recv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_ALPHABET **byp_abc, P7_HMM **ret_hmm) { int pos = 0; int code; int n; MPI_Status mpistatus; int status; /* Probe first, because we need to know if our buffer is big enough. */ if ( MPI_Probe(source, tag, comm, &mpistatus) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi probe failed"); if ( MPI_Get_count(&mpistatus, MPI_PACKED, &n) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi get count failed"); /* Make sure the buffer is allocated appropriately */ if (*buf == NULL || n > *nalloc) { ESL_REALLOC(*buf, sizeof(char) * n); *nalloc = n; } /* Receive the entire packed work unit */ if (MPI_Recv(*buf, n, MPI_PACKED, source, tag, comm, &mpistatus) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi recv failed"); /* Unpack the status code prefix */ if (MPI_Unpack(*buf, n, &pos, &code, 1, MPI_INT, comm) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (code == 0) { status = eslEOD; *ret_hmm = NULL; } else if (code == 1) status = p7_hmm_mpi_Unpack(*buf, *nalloc, &pos, comm, byp_abc, ret_hmm); else ESL_EXCEPTION(eslESYS, "bad mpi buffer transmission code"); return status; ERROR: /* from ESL_REALLOC only */ *ret_hmm = NULL; return status; }
/* tau_by_moments() * * Obtain an initial estimate for tau by * matching moments. Also returns mean and * logsum, which we need for ML fitting. * To obtain a lambda estimate, use * lambda = tau / mean. */ static int tau_by_moments(double *x, int n, double mu, double *ret_tau, double *ret_mean, double *ret_logsum) { int i; double mean, var, logsum; mean = var = logsum = 0.; for (i = 0; i < n; i++) { if (x[i] < mu) ESL_EXCEPTION(eslEINVAL, "No x[i] can be < mu in gamma data"); mean += x[i] - mu; /* mean is temporarily just the sum */ logsum += log(x[i] - mu); var += (x[i]-mu)*(x[i]-mu); /* var is temporarily the sum of squares */ } var = (var - mean*mean/(double)n) / ((double)n-1); /* now var is the variance */ mean /= (double) n; /* and now mean is the mean */ logsum /= (double) n; if (var == 0.) /* and if mean = 0, var = 0 anyway. */ ESL_EXCEPTION(eslEINVAL, "Zero variance in allegedly gamma-distributed dataset"); if (ret_tau != NULL) *ret_tau = mean * mean / var; if (ret_mean != NULL) *ret_mean = mean; if (ret_logsum != NULL) *ret_logsum = logsum; return eslOK; }
/* Function: esl_workqueue_Remove() * Synopsis: Removes a queued object from the producers list. * Incept: MSF, Thu Jun 18 11:51:39 2009 * * Purpose: Removes a queued object from the producers list. * * A object <void> that has already been consumed by a worker * is removed the the producers list. If there are no empty * objects, a <obj> is set to NULL. * * The pointer to the object is returned in the obj arguement. * * Returns: <eslOK> on success. * <eslEOD> if no objects are in the queue. * * Throws: <eslESYS> if thread synchronization fails somewhere. * <eslEINVAL> if something's wrong with <queue>. */ int esl_workqueue_Remove(ESL_WORK_QUEUE *queue, void **obj) { int inx; int status = eslEOD; if (obj == NULL) ESL_EXCEPTION(eslEINVAL, "Invalid object pointer"); if (queue == NULL) ESL_EXCEPTION(eslEINVAL, "Invalid queue object"); if (pthread_mutex_lock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex lock failed"); /* check if there are any items on the readers list */ *obj = NULL; if (queue->readerQueueCnt > 0) { inx = (queue->readerQueueHead + queue->readerQueueCnt) % queue->queueSize; *obj = queue->readerQueue[inx]; queue->readerQueue[inx] = NULL; --queue->readerQueueCnt; status = eslOK; } if (pthread_mutex_unlock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed"); return status; }
/* Function: esl_msashuffle_Shuffle() * Synopsis: Shuffle an alignment's columns. * * Purpose: Returns a column-shuffled version of <msa> in <shuf>, * using random generator <r>. Shuffling by columns * preserves the \% identity of the original * alignment. <msa> and <shuf> can be identical, to shuffle * in place. * * The caller sets up the rest of the data (everything but * the alignment itself) in <shuf> the way it wants, * including sequence names, MSA name, and other * annotation. The easy thing to do is to make <shuf> * a copy of <msa>: the caller might create <shuf> by * a call to <esl_msa_Clone()>. * * The alignments <msa> and <shuf> can both be in digital * mode, or can both be in text mode; you cannot mix * digital and text modes. * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <msa>,<shuf> aren't in the same mode (digital vs. text). */ int esl_msashuffle_Shuffle(ESL_RANDOMNESS *r, ESL_MSA *msa, ESL_MSA *shuf) { int i, pos, alen; if (! (msa->flags & eslMSA_DIGITAL)) { char c; if (shuf->flags & eslMSA_DIGITAL) ESL_EXCEPTION(eslEINVAL, "<shuf> must be in text mode if <msa> is"); if (msa != shuf) { for (i = 0; i < msa->nseq; i++) strcpy(shuf->aseq[i], msa->aseq[i]); } for (i = 0; i < msa->nseq; i++) shuf->aseq[i][msa->alen] = '\0'; for (alen = msa->alen; alen > 1; alen--) { pos = esl_rnd_Roll(r, alen); for (i = 0; i < msa->nseq; i++) { c = msa->aseq[i][pos]; shuf->aseq[i][pos] = shuf->aseq[i][alen-1]; shuf->aseq[i][alen-1] = c; } } } #ifdef eslAUGMENT_ALPHABET else { ESL_DSQ x; if (! (shuf->flags & eslMSA_DIGITAL)) ESL_EXCEPTION(eslEINVAL, "<shuf> must be in digital mode if <msa> is"); if (msa != shuf) { for (i = 0; i < msa->nseq; i++) memcpy(shuf->ax[i], msa->ax[i], (msa->alen + 2) * sizeof(ESL_DSQ)); } for (i = 0; i < msa->nseq; i++) shuf->ax[i][msa->alen+1] = eslDSQ_SENTINEL; for (alen = msa->alen; alen > 1; alen--) { pos = esl_rnd_Roll(r, alen) + 1; for (i = 0; i < msa->nseq; i++) { x = msa->ax[i][pos]; shuf->ax[i][pos] = shuf->ax[i][alen]; shuf->ax[i][alen] = x; } } } #endif /*eslAUGMENT_ALPHABET*/ return eslOK; }
/* Function: esl_hyperexp_Write() * * Purpose: Write hyperexponential parameters from <hxp> to an open <fp>. * * The output format is suitable for input by <esl_hyperexp_Read()>. * * Returns: <eslOK> on success. * * Throws: <eslEWRITE> on any write error. */ int esl_hyperexp_Write(FILE *fp, ESL_HYPEREXP *hxp) { int k; if (fprintf(fp, "%8d # number of components\n", hxp->K) < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed"); if (fprintf(fp, "%8.2f # mu (for all components)\n", hxp->mu) < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed"); for (k = 0; k < hxp->K; k++) if (fprintf(fp, "%8.6f %12.6f # q[%d], lambda[%d]\n", hxp->q[k], hxp->lambda[k], k, k) < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed"); return eslOK; }
/* Function: p7_Forward() * Synopsis: The Forward algorithm, full matrix fill version. * Incept: SRE, Fri Aug 15 18:59:43 2008 [Casa de Gatos] * * Purpose: Calculates the Forward algorithm for sequence <dsq> of * length <L> residues, using optimized profile <om>, and a * preallocated DP matrix <ox>. Upon successful return, <ox> * contains the filled Forward matrix, and <*opt_sc> * optionally contains the raw Forward score in nats. * * This calculation requires $O(ML)$ memory and time. * The caller must provide a suitably allocated full <ox> * by calling <ox = p7_omx_Create(M, L, L)> or * <p7_omx_GrowTo(ox, M, L, L)>. * * The model <om> must be configured in local alignment * mode. The sparse rescaling method used to keep * probability values within single-precision floating * point dynamic range cannot be safely applied to models in * glocal or global mode. * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - RETURN: Forward DP matrix * opt_sc - RETURN: Forward score (in nats) * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile * isn't in local alignment mode. * <eslERANGE> if the score exceeds the limited range of * a probability-space odds ratio. * In either case, <*opt_sc> is undefined. */ int p7_Forward(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc) { #ifdef p7_DEBUGGING if (om->M > ox->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)"); if (L >= ox->validR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)"); if (L >= ox->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)"); if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment"); #endif return forward_engine(TRUE, dsq, L, om, ox, opt_sc); }
/* Function: p7_oprofile_ReadRest() * Synopsis: Read the rest of an optimized profile. * * Purpose: Read the rest of an optimized profile <om> from * the <.h3p> file associated with an open HMM * file <hfp>. * * This is the second part of a two-part calling sequence. * The <om> here must be the result of a previous * successful <p7_oprofile_ReadMSV()> call on the same * open <hfp>. * * Args: hfp - open HMM file, from which we've previously * called <p7_oprofile_ReadMSV()>. * om - optimized profile that was successfully * returned by <p7_oprofile_ReadMSV()>. * * Returns: <eslOK> on success, and <om> is now a complete * optimized profile. * * Returns <eslEFORMAT> if <hfp> has no <.h3p> file open, * or on any parsing error, and set <hfp->errbuf> to * an informative error message. * * Throws: <eslESYS> if an <fseek()> fails to reposition the * binary <.h3p> file. * * <eslEMEM> on allocation error. */ int p7_oprofile_ReadRest(P7_HMMFILE *hfp, P7_OPROFILE *om) { uint32_t magic; int M; int alphatype; int status = eslOK; #ifdef HMMER_THREADS /* lock the mutex to prevent other threads from reading from the optimized * profile at the same time. */ if (hfp->syncRead) { if (pthread_mutex_lock (&hfp->readMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex lock failed"); } #endif if (! fread( (char *) &magic, sizeof(uint32_t), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read magic"); if (magic == v3a_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/a); please hmmpress your HMM file again"); if (magic == v3b_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/b); please hmmpress your HMM file again"); if (magic == v3c_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/c); please hmmpress your HMM file again"); if (magic == v3d_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/d); please hmmpress your HMM file again"); if (magic == v3e_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "binary auxfiles are in an outdated HMMER format (3/e); please hmmpress your HMM file again"); if (magic != v3f_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "bad magic; not an HMM database file?"); if (! fread( (char *) &M, sizeof(int), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read model size M"); if (! fread( (char *) &alphatype, sizeof(int), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "failed to read alphabet type"); if (! fread( (char *) &magic, sizeof(uint32_t), 1, hfp->pfp)) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "no sentinel magic: .h3p file corrupted?"); if (magic != v3f_pmagic) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "bad sentinel magic: .h3p file corrupted?"); if (M != om->M) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "p/f model length mismatch"); if (alphatype != om->abc->type) ESL_XFAIL(eslEFORMAT, hfp->errbuf, "p/f alphabet type mismatch"); #ifdef HMMER_THREADS if (hfp->syncRead) { if (pthread_mutex_unlock (&hfp->readMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed"); } #endif return eslOK; ERROR: #ifdef HMMER_THREADS if (hfp->syncRead) { if (pthread_mutex_unlock (&hfp->readMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed"); } #endif return status; }
/* Function: p7_oprofile_Position() * Synopsis: Reposition an open hmm file to an offset. * Incept: MSF, Thu Oct 15, 2009 [Janelia] * * Purpose: Reposition an open <hfp> to offset <offset>. * <offset> would usually be the first byte of a * desired hmm record. * * Returns: <eslOK> on success; * <eslEOF> if no data can be read from this position. * * Throws: <eslEINVAL> if the <sqfp> is not positionable. * <eslEFORMAT> if no msv profile opened. * <eslESYS> if the fseeko() call fails. */ int p7_oprofile_Position(P7_HMMFILE *hfp, off_t offset) { if (hfp->ffp == NULL) ESL_EXCEPTION(eslEFORMAT, hfp->errbuf, "no MSV profile file; hmmpress probably wasn't run"); if (hfp->do_stdin) ESL_EXCEPTION(eslEINVAL, "can't Position() in standard input"); if (hfp->do_gzip) ESL_EXCEPTION(eslEINVAL, "can't Position() in a gzipped file"); if (offset < 0) ESL_EXCEPTION(eslEINVAL, "bad offset"); if (fseeko(hfp->ffp, offset, SEEK_SET) != 0) ESL_EXCEPTION(eslESYS, "fseeko() failed"); return eslOK; }
/* Function: p7_BackwardParser() * Synopsis: The Backward algorithm, linear memory parsing version. * Incept: SRE, Sat Aug 16 08:34:13 2008 [Janelia] * * Purpose: Same as <p7_Backward()> except that the full matrix isn't * kept. Instead, a linear $O(M+L)$ memory algorithm is * used, keeping only the DP matrix values for the special * (BENCJ) states. These are sufficient to do posterior * decoding to identify high-probability regions where * domains are. * * The caller must provide a suitably allocated "parsing" * <bck> by calling <bck = p7_omx_Create(M, 0, L)> or * <p7_omx_GrowTo(bck, M, 0, L)>. * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * fwd - filled Forward DP matrix, for scale factors * bck - RETURN: filled Backward matrix * opt_sc - optRETURN: Backward score (in nats) * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile * isn't in local alignment mode. * <eslERANGE> if the score exceeds the limited range of * a probability-space odds ratio. * In either case, <*opt_sc> is undefined. */ int p7_BackwardParser(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc) { #ifdef p7_DEBUGGING if (om->M > bck->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)"); if (bck->validR < 1) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)"); if (L >= bck->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)"); if (L != fwd->L) ESL_EXCEPTION(eslEINVAL, "fwd matrix size doesn't agree with length L"); if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment"); #endif return backward_engine(FALSE, dsq, L, om, fwd, bck, opt_sc); }
/* Function: esl_workqueue_Complete() * Synopsis: Signals the end of the queue. * Incept: MSF, Thu Jun 18 11:51:39 2009 * * Purpose: Signal the end of the queue. If there are any threads * waiting on an object, signal them to wake up and complete * their processing. * * Returns: <eslOK> on success. * * Throws: <eslESYS> if thread synchronization fails somewhere. * <eslEINVAL> if something's wrong with <queue>. */ int esl_workqueue_Complete(ESL_WORK_QUEUE *queue) { if (queue == NULL) ESL_EXCEPTION(eslEINVAL, "Invalid queue object"); if (pthread_mutex_lock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex lock failed"); if (queue->pendingWorkers != 0) { if (pthread_cond_broadcast (&queue->workerQueueCond) != 0) ESL_EXCEPTION(eslESYS, "broadcast failed"); } if (pthread_mutex_unlock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed"); return eslOK; }
/* Function: p7_emit_FancyConsensus() * Synopsis: Emit a fancier consensus with upper/lower case and N/X's. * Incept: SRE, Fri May 14 09:33:10 2010 [Janelia] * * Purpose: Generate a consensus sequence for model <hmm>, consisting * of the maximum probability residue in each match state; * store this sequence in text-mode <sq> provided by the caller. * * If the probability of the consensus residue is less than * <min_lower>, show an ``any'' residue (N or X) instead. * If the probability of the consensus residue is $\geq$ * <min_lower> and less than <min_upper>, show the residue * as lower case; if it is $\geq$ <min_upper>, show it as * upper case. * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if the <sq> isn't in text mode. * * Xref: SRE:J6/59. */ int p7_emit_FancyConsensus(const P7_HMM *hmm, float min_lower, float min_upper, ESL_SQ *sq) { int k, x; float p; char c; int status; if (! esl_sq_IsText(sq)) ESL_EXCEPTION(eslEINVAL, "p7_emit_FancyConsensus() expects a text-mode <sq>"); if ((status = esl_sq_GrowTo(sq, hmm->M)) != eslOK) return status; for (k = 1; k <= hmm->M; k++) { if (hmm->mm && hmm->mm[k] == 'm') { //masked position, spit out the degenerate code if ((status = esl_sq_CAddResidue(sq, tolower(esl_abc_CGetUnknown(hmm->abc))) ) != eslOK) return status; } else { p = esl_vec_FMax( hmm->mat[k], hmm->abc->K); x = esl_vec_FArgMax(hmm->mat[k], hmm->abc->K); if (p < min_lower) c = tolower(esl_abc_CGetUnknown(hmm->abc)); else if (p >= min_upper) c = toupper(hmm->abc->sym[x]); else c = tolower(hmm->abc->sym[x]); if ((status = esl_sq_CAddResidue(sq, c)) != eslOK) return status; } } if ((status = esl_sq_CAddResidue(sq, '\0')) != eslOK) return status; return eslOK; }
/* Using FChoose() here would mean allocating tmp space for 2M-1 paths; * instead we use the fact that E(i) is itself the necessary normalization * factor, and implement FChoose's algorithm here for an on-the-fly * calculation. */ static inline int select_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k) { int Q = p7O_NQF(ox->M); double sum = 0.0; double roll = esl_random(rng); double norm = 1.0 / ox->xmx[i*p7X_NXCELLS+p7X_E]; /* all M, D already scaled exactly the same */ vector float xEv = esl_vmx_set_float(norm); vector float zerov = (vector float) vec_splat_u32(0); union { vector float v; float p[4]; } u; int q,r; while (1) { for (q = 0; q < Q; q++) { u.v = vec_madd(ox->dpf[i][q*3 + p7X_M], xEv, zerov); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_M;} } u.v = vec_madd(ox->dpf[i][q*3 + p7X_D], xEv, zerov); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_D;} } } ESL_DASSERT1(sum > 0.99); } /*UNREACHED*/ ESL_EXCEPTION(-1, "unreached code was reached. universe collapses."); }
/* Function: esl_stats_Psi() * Synopsis: Calculates $\Psi(x)$ (the digamma function). * Incept: SRE, Tue Nov 15 13:57:59 2005 [St. Louis] * * Purpose: Computes $\Psi(x)$ (the "digamma" function), which is * the derivative of log of the Gamma function: * $d/dx \log \Gamma(x) = \frac{\Gamma'(x)}{\Gamma(x)} = \Psi(x)$. * Argument $x$ is $> 0$. * * This is J.M. Bernardo's "Algorithm AS103", * Appl. Stat. 25:315-317 (1976). */ int esl_stats_Psi(double x, double *ret_answer) { double answer = 0.; double x2; if (x <= 0.0) ESL_EXCEPTION(eslERANGE, "invalid x <= 0 in esl_stats_Psi()"); /* For small x, Psi(x) ~= -0.5772 - 1/x + O(x), we're done. */ if (x <= 1e-5) { *ret_answer = -eslCONST_EULER - 1./x; return eslOK; } /* For medium x, use Psi(1+x) = \Psi(x) + 1/x to c.o.v. x, * big enough for Stirling approximation to work... */ while (x < 8.5) { answer = answer - 1./x; x += 1.; } /* For large X, use Stirling approximation */ x2 = 1./x; answer += log(x) - 0.5 * x2; x2 = x2*x2; answer -= (1./12.)*x2; answer += (1./120.)*x2*x2; answer -= (1./252.)*x2*x2*x2; *ret_answer = answer; return eslOK; }
/* Function: esl_exp_FitCompleteBinned() * Incept: SRE, Sun Aug 21 13:07:22 2005 [St. Louis] * * Purpose: Fit a complete exponential distribution to the observed * binned data in a histogram <g>, where each * bin i holds some number of observed samples x with values from * lower bound l to upper bound u (that is, $l < x \leq u$); * find maximum likelihood parameters $\mu,\lambda$ and * return them in <*ret_mu>, <*ret_lambda>. * * If the binned data in <g> were set to focus on * a tail by virtual censoring, the "complete" exponential is * fitted to this tail. The caller then also needs to * remember what fraction of the probability mass was in this * tail. * * The ML estimate for $mu$ is the smallest observed * sample. For complete data, <ret_mu> is generally set to * the smallest observed sample value, except in the * special case of a "rounded" complete dataset, where * <ret_mu> is set to the lower bound of the smallest * occupied bin. For tails, <ret_mu> is set to the cutoff * threshold <phi>, where we are guaranteed that <phi> is * at the lower bound of a bin (by how the histogram * object sets tails). * * The ML estimate for <ret_lambda> has an analytical * solution, so this routine is fast. * * If all the data are in one bin, the ML estimate of * $\lambda$ will be $\infty$. This is mathematically correct, * but is probably a situation the caller wants to avoid, perhaps * by choosing smaller bins. * * This function currently cannot fit an exponential tail * to truly censored, binned data, because it assumes that * all bins have equal width, but in true censored data, the * lower cutoff <phi> may fall anywhere in the first bin. * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if dataset is true-censored. */ int esl_exp_FitCompleteBinned(ESL_HISTOGRAM *g, double *ret_mu, double *ret_lambda) { int i; double ai, bi, delta; double sa, sb; double mu = 0.; if (g->dataset_is == ESL_HISTOGRAM::COMPLETE) { if (g->is_rounded) mu = esl_histogram_Bin2LBound(g, g->imin); else mu = g->xmin; } else if (g->dataset_is == ESL_HISTOGRAM::VIRTUAL_CENSORED) /* i.e., we'll fit to tail */ mu = g->phi; else if (g->dataset_is == ESL_HISTOGRAM::TRUE_CENSORED) ESL_EXCEPTION(eslEINVAL, "can't fit true censored dataset"); delta = g->w; sa = sb = 0.; for (i = g->cmin; i <= g->imax; i++) /* for each occupied bin */ { if (g->obs[i] == 0) continue; ai = esl_histogram_Bin2LBound(g,i); bi = esl_histogram_Bin2UBound(g,i); sa += g->obs[i] * (ai-mu); sb += g->obs[i] * (bi-mu); } *ret_mu = mu; *ret_lambda = 1/delta * (log(sb) - log(sa)); return eslOK; }
/* Function: esl_rmx_E2Q() * Incept: SRE, Tue Jul 13 15:52:41 2004 [St. Louis] * * Purpose: Given a lower triangular matrix ($j<i$) of * residue exchangeabilities <E>, and a stationary residue * frequency vector <pi>; assuming $E_{ij} = E_{ji}$; * calculates a rate matrix <Q> as * * $Q_{ij} = E_{ij} * \pi_j$ * * The resulting <Q> is not normalized to any particular * number of substitutions/site/time unit. See * <esl_rmx_ScaleTo()> for that. * * Args: E - symmetric residue "exchangeabilities"; * only lower triangular entries are used. * pi - residue frequencies at stationarity. * Q - RETURN: rate matrix, square (NxN). * Caller allocates the memory for this. * * Returns: <eslOK> on success; Q is calculated and filled in. * * Xref: STL8/p56. */ int esl_rmx_E2Q(ESL_DMATRIX *E, double *pi, ESL_DMATRIX *Q) { int i,j; if (E->n != Q->n) ESL_EXCEPTION(eslEINVAL, "E and Q sizes differ"); /* Scale all off-diagonals to pi[j] * E[i][j]. */ for (i = 0; i < E->n; i++) for (j = 0; j < i; j++) /* only look at lower triangle of E. */ { Q->mx[i][j] = pi[j] * E->mx[i][j]; Q->mx[j][i] = pi[i] * E->mx[i][j]; } /* Set diagonal to -\sum of all j != i. */ for (i = 0; i < Q->n; i++) { Q->mx[i][i] = 0.; /* makes the vector sum work for j != i */ Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], Q->n); } return eslOK; }
int profillic_eslx_msafile_Read(ESLX_MSAFILE *afp, ESL_MSA **ret_msa, ProfileType * profile_ptr) { ESL_MSA *msa = NULL; int status = eslOK; #ifdef eslAUGMENT_SSI esl_pos_t offset = esl_buffer_GetOffset(afp->bf); #endif switch (afp->format) { case eslMSAFILE_A2M: if ((status = esl_msafile_a2m_Read (afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_AFA: if ((status = esl_msafile_afa_Read (afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_CLUSTAL: if ((status = esl_msafile_clustal_Read (afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_CLUSTALLIKE: if ((status = esl_msafile_clustal_Read (afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_PFAM: if ((status = esl_msafile_stockholm_Read(afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_PHYLIP: if ((status = esl_msafile_phylip_Read (afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_PHYLIPS: if ((status = esl_msafile_phylip_Read (afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_PSIBLAST: if ((status = esl_msafile_psiblast_Read (afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_SELEX: if ((status = esl_msafile_selex_Read (afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_STOCKHOLM: if ((status = esl_msafile_stockholm_Read(afp, &msa)) != eslOK) goto ERROR; break; case eslMSAFILE_PROFILLIC: if ((status = profillic_esl_msafile_profile_Read(afp, &msa, profile_ptr)) != eslOK) goto ERROR; break; default: ESL_EXCEPTION(eslEINCONCEIVABLE, "no such msa file format"); break; } #ifdef eslAUGMENT_SSI msa->offset = offset; #endif *ret_msa = msa; return eslOK; ERROR: if (msa) esl_msa_Destroy(msa); *ret_msa = NULL; return status; }
/* Function: esl_randomness_Init() * Synopsis: Reinitialize an RNG. * Incept: SRE, Wed Jul 14 13:13:05 2004 [St. Louis] * * Purpose: Reset and reinitialize an existing <ESL_RANDOMNESS> * object. * * (Not generally recommended. This does not make a * sequence of numbers more random, and may make it less * so.) * * Args: r - randomness object * seed - new seed to use; >0. * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if seed is $<= 0$. * * Xref: STL8/p57. */ int esl_randomness_Init(ESL_RANDOMNESS *r, long seed) { int burnin = 7; if (seed <= 0) ESL_EXCEPTION(eslEINVAL, "bad seed"); r->seed = seed; while (burnin--) esl_random(r); return eslOK; }
/* Function: esl_recorder_MarkBlock() * Synopsis: Mark first line to be saved in a block. * Incept: SRE, Fri Jan 1 11:13:53 2010 [Magallon] * * Purpose: Mark line number <markline> (0..N-1) in a file being read * through the <ESL_RECORDER> <rc> as the first line in a * block of lines to be parsed later, when the end of * the block is found. * * This mark makes sure that the <ESL_RECORDER> will keep * the entire block of lines in memory, starting at or * before the mark. When a mark is active, * <esl_recorder_Read()> will reallocate and grow the * recorder as necessary, rather than overwriting the mark. * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if the <markline> has already passed out * of the recorder's memory. */ int esl_recorder_MarkBlock(ESL_RECORDER *rc, int markline) { int line0 = ESL_MAX(rc->baseline, rc->nread - rc->nalloc); if (markline < line0) ESL_EXCEPTION(eslEINVAL, "recorder window already passed marked line"); rc->markline = markline; return eslOK; }
/* Function: esl_rmx_SetWAG() * Incept: SRE, Thu Mar 8 18:00:00 2007 [Janelia] * * Purpose: Sets a $20 \times 20$ rate matrix <Q> to WAG parameters. * The caller allocated <Q>. * * If <pi> is non-<NULL>, it provides a vector of 20 amino * acid stationary probabilities in Easel alphabetic order, * A..Y, and the WAG stationary probabilities are set to * these desired $\pi_i$. If <pi> is <NULL>, the default * WAG stationary probabilities are used. * * The WAG parameters are a maximum likelihood * parameterization obtained by Whelan and Goldman * \citep{WhelanGoldman01}. * * Note: The data table was reformatted from wag.dat by the UTILITY1 * executable in the paml module. The wag.dat file was obtained from * \url{http://www.ebi.ac.uk/goldman/WAG/wag.dat}. A copy * is in formats/wag.dat. * * Args: Q - a 20x20 rate matrix to set, allocated by caller. * pi - desired stationary probabilities A..Y, or * NULL to use WAG defaults. * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <Q> isn't a 20x20 general matrix; and * the state of <Q> is undefined. */ int esl_rmx_SetWAG(ESL_DMATRIX *Q, double *pi) { static double wagE[190] = { 1.027040, 0.738998, 0.030295, 1.582850, 0.021352, 6.174160, 0.210494, 0.398020, 0.046730, 0.081134, 1.416720, 0.306674, 0.865584, 0.567717, 0.049931, 0.316954, 0.248972, 0.930676, 0.570025, 0.679371, 0.249410, 0.193335, 0.170135, 0.039437, 0.127395, 1.059470, 0.030450, 0.138190, 0.906265, 0.074034, 0.479855, 2.584430, 0.088836, 0.373558, 0.890432, 0.323832, 0.397915, 0.384287, 0.084805, 0.154263, 2.115170, 0.061304, 0.499462, 3.170970, 0.257555, 0.893496, 0.390482, 0.103754, 0.315124, 1.190630, 0.174100, 0.404141, 4.257460, 0.934276, 4.854020, 0.509848, 0.265256, 5.429420, 0.947198, 0.096162, 1.125560, 3.956290, 0.554236, 3.012010, 0.131528, 0.198221, 1.438550, 0.109404, 0.423984, 0.682355, 0.161444, 0.243570, 0.696198, 0.099929, 0.556896, 0.415844, 0.171329, 0.195081, 0.908598, 0.098818, 0.616783, 5.469470, 0.099921, 0.330052, 4.294110, 0.113917, 3.894900, 0.869489, 1.545260, 1.543640, 0.933372, 0.551571, 0.528191, 0.147304, 0.439157, 0.102711, 0.584665, 2.137150, 0.186979, 5.351420, 0.497671, 0.683162, 0.635346, 0.679489, 3.035500, 3.370790, 1.407660, 1.071760, 0.704939, 0.545931, 1.341820, 0.740169, 0.319440, 0.967130, 0.344739, 0.493905, 3.974230, 1.613280, 1.028870, 1.224190, 2.121110, 0.512984, 0.374866, 0.822765, 0.171903, 0.225833, 0.473307, 1.458160, 1.386980, 0.326622, 1.516120, 2.030060, 0.795384, 0.857928, 0.554413, 4.378020, 2.006010, 1.002140, 0.152335, 0.588731, 0.649892, 0.187247, 0.118358, 7.821300, 0.305434, 1.800340, 2.058450, 0.196246, 0.314887, 0.301281, 0.251849, 0.232739, 1.388230, 0.113133, 0.717070, 0.129767, 0.156557, 1.529640, 0.336983, 0.262569, 0.212483, 0.137505, 0.665309, 0.515706, 0.071917, 0.139405, 0.215737, 1.163920, 0.523742, 0.110864, 0.365369, 0.240735, 0.543833, 0.325711, 0.196303, 6.454280, 0.103604, 3.873440, 0.420170, 0.133264, 0.398618, 0.428437, 1.086000, 0.216046, 0.227710, 0.381533, 0.786993, 0.291148, 0.314730, 2.485390}; static double wagpi[20]; int i,j,z; if (Q->m != 20 || Q->n != 20 || Q->type != eslGENERAL) ESL_EXCEPTION(eslEINVAL, "Q must be a 20x20 general matrix"); esl_composition_WAG(wagpi); /* 1. Transfer the wag E lower triagonal matrix directly into Q. */ z = 0; for (i = 0; i < 20; i++) { Q->mx[i][i] = 0.; /* code below depends on this zero initialization */ for (j = 0; j < i; j++) { Q->mx[i][j] = wagE[z++]; Q->mx[j][i] = Q->mx[i][j]; } } /* 2. Set offdiagonals Q_ij = E_ij * pi_j */ for (i = 0; i < 20; i++) for (j = 0; j < 20; j++) if (pi != NULL) Q->mx[i][j] *= pi[j]; else Q->mx[i][j] *= wagpi[j]; /* 3. Set diagonal Q_ii to -\sum_{i \neq j} Q_ij */ for (i = 0; i < 20; i++) Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], 20); /* 4. Renormalize matrix to units of 1 substitution/site. */ if (pi != NULL) esl_rmx_ScaleTo(Q, pi, 1.0); else esl_rmx_ScaleTo(Q, wagpi, 1.0); return eslOK; }
/* Function: esl_msashuffle_Bootstrap() * Synopsis: Bootstrap sample an MSA. * Incept: SRE, Tue Jan 22 11:05:07 2008 [Janelia] * * Purpose: Takes a bootstrap sample of <msa> (sample <alen> columns, * with replacement) and puts it in <bootsample>, using * random generator <r>. * * The caller provides allocated space for <bootsample>. * It must be different space than <msa>; you cannot take * a bootstrap sample "in place". The caller sets up the * rest of the data in <bootsample> (everything but the * alignment itself) the way it wants, including sequence * names, MSA name, and other annotation. The easy thing to * do is to initialize <bootsample> by cloning <msa>. * * The alignments <msa> and <bootsample> can both be in digital * mode, or can both be in text mode; you cannot mix * digital and text modes. * * Returns: <eslOK> on success, and the alignment in <bootsample> is * set to be a bootstrap resample of the alignment in <msa>. * * Throws: <eslEINVAL> if <msa>,<bootsample> aren't in the same mode * (digital vs. text). */ int esl_msashuffle_Bootstrap(ESL_RANDOMNESS *r, ESL_MSA *msa, ESL_MSA *bootsample) { int i, pos, col; /* contract checks */ if ( (msa->flags & eslMSA_DIGITAL) && ! (bootsample->flags & eslMSA_DIGITAL)) ESL_EXCEPTION(eslEINVAL, "<msa> and <bootsample> must both be in digital or text mode"); if (! (msa->flags & eslMSA_DIGITAL) && (bootsample->flags & eslMSA_DIGITAL)) ESL_EXCEPTION(eslEINVAL, "<msa> and <bootsample> must both be in digital or text mode"); if (! (msa->flags & eslMSA_DIGITAL)) { for (pos = 0; pos < msa->alen; pos++) { col = esl_rnd_Roll(r, msa->alen); for (i = 0; i < msa->nseq; i++) bootsample->aseq[i][pos] = msa->aseq[i][col]; } for (i = 0; i < msa->nseq; i++) bootsample->aseq[i][msa->alen] = '\0'; } #ifdef eslAUGMENT_ALPHABET else { for (i = 0; i < msa->nseq; i++) bootsample->ax[i][0] = eslDSQ_SENTINEL; for (pos = 1; pos <= msa->alen; pos++) { col = esl_rnd_Roll(r, msa->alen) + 1; for (i = 0; i < msa->nseq; i++) bootsample->ax[i][pos] = msa->ax[i][col]; } for (i = 0; i < msa->nseq; i++) bootsample->ax[i][msa->alen+1] = eslDSQ_SENTINEL; } #endif /*eslAUGMENT_ALPHABET*/ return eslOK; }
/* Function: p7_GOATrace() * Synopsis: Optimal accuracy decoding: traceback. * Incept: SRE, Fri Feb 29 12:59:11 2008 [Janelia] * * Purpose: The traceback stage of the optimal accuracy decoding algorithm * \citep{Kall05}. * * Caller provides the OA DP matrix <gx> that was just * calculated by <p7_GOptimalAccuracy()>, as well as the * posterior decoding matrix <pp>, which was calculated by * Forward/Backward on a target sequence of length <L> * using the query model <gm>. * * Caller provides an empty traceback structure <tr> to * hold the result, allocated to hold optional posterior * probability annotation on residues (with * <p7_trace_CreateWithPP()>, generally). This will be * internally reallocated as needed for larger traces. * * Args: gm - query profile * pp - posterior decoding matrix created by <p7_PosteriorDecoding()> * gx - OA DP matrix calculated by <p7_OptimalAccuracyDP()> * tr - RESULT: OA traceback, allocated with posterior probs * * Returns: <eslOK> on success, and <tr> contains the OA traceback. * * Throws: <eslEMEM> on allocation error. */ int p7_GOATrace(const P7_PROFILE *gm, const P7_GMX *pp, const P7_GMX *gx, P7_TRACE *tr) { int i = gx->L; /* position in seq (1..L) */ int k = 0; /* position in model (1..M) */ float postprob; int sprv, scur; int status; #ifdef p7_DEBUGGING if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace isn't empty: forgot to Reuse()?"); #endif if ((status = p7_trace_AppendWithPP(tr, p7T_T, k, i, 0.0)) != eslOK) return status; if ((status = p7_trace_AppendWithPP(tr, p7T_C, k, i, 0.0)) != eslOK) return status; sprv = p7T_C; while (sprv != p7T_S) { switch (sprv) { case p7T_M: scur = select_m(gm, gx, i, k); k--; i--; break; case p7T_D: scur = select_d(gm, gx, i, k); k--; break; case p7T_I: scur = select_i(gm, gx, i, k); i--; break; case p7T_N: scur = select_n(i); break; case p7T_C: scur = select_c(gm, pp, gx, i); break; case p7T_J: scur = select_j(gm, pp, gx, i); break; case p7T_E: scur = select_e(gm, gx, i, &k); break; case p7T_B: scur = select_b(gm, gx, i); break; default: ESL_EXCEPTION(eslEINVAL, "bogus state in traceback"); } if (scur == -1) ESL_EXCEPTION(eslEINVAL, "OA traceback choice failed"); postprob = get_postprob(pp, scur, sprv, k, i); if ((status = p7_trace_AppendWithPP(tr, scur, k, i, postprob)) != eslOK) return status; /* For NCJ, we had to defer i decrement. */ if ( (scur == p7T_N || scur == p7T_J || scur == p7T_C) && scur == sprv) i--; sprv = scur; } tr->M = gm->M; tr->L = gx->L; return p7_trace_Reverse(tr); }
/* Function: esl_workqueue_Dump() * Synopsis: Print the contents of the queues. * Incept: MSF, Thu Jun 18 11:51:39 2009 * * Purpose: Print the contents of the queues and their pointers. * * Returns: <eslOK> on success. */ int esl_workqueue_Dump(ESL_WORK_QUEUE *queue) { int i; if (queue == NULL) ESL_EXCEPTION(eslEINVAL, "Invalid queue object"); if (pthread_mutex_lock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex lock failed"); printf ("Reader head: %2d count: %2d\n", queue->readerQueueHead, queue->readerQueueCnt); printf ("Worker head: %2d count: %2d\n", queue->workerQueueHead, queue->workerQueueCnt); for (i = 0; i < queue->queueSize; ++i) { printf (" %2d: %p %p\n", i, queue->readerQueue[i], queue->workerQueue[i]); } printf ("Pending: %2d\n\n", queue->pendingWorkers); if (pthread_mutex_unlock (&queue->queueMutex) != 0) ESL_EXCEPTION(eslESYS, "mutex unlock failed"); return eslOK; }
int p7_sparsemask_StartRow_avx512(P7_SPARSEMASK *sm, int i) { #ifdef HAVE_AVX512 int r; int status; #ifdef p7_DEBUGGING if (i < 1 || i > sm->L) ESL_EXCEPTION(eslEINVAL, "i is 1..L: sequence position"); if (sm->last_i <= i) ESL_EXCEPTION(eslEINVAL, "rows need to be added in reverse order L..1"); #endif /* Make sure kmem has enough memory; if not, double it. * Because we know the original allocation was enough to hold * the slots, we know that doubling (even if ncells has filled * the current kalloc) is sufficient. */ if (sm->ncells_AVX_512 + p7_VNF_AVX_512*sm->Q_AVX_512 > sm->kalloc_AVX_512) { int64_t kalloc_req = sm->kalloc_AVX_512 * 2; ESL_REALLOC(sm->kmem_AVX_512, sizeof(int) * kalloc_req); sm->kalloc_AVX_512 = kalloc_req; sm->n_krealloc++; } for (r = 0; r < p7_VNF_AVX_512; r++) { sm->s_AVX_512[p7_VNF_AVX_512-r-1] = sm->kmem_AVX_512 + sm->ncells_AVX_512 + r*sm->Q_AVX_512; sm->sn_AVX_512[r] = 0; } sm->last_i_AVX_512 = i; for (r = 0; r < p7_VNF_AVX_512; r++) sm->last_k_AVX_512[r] = sm->M+1; /* sentinel to be sure that Add() is called in reverse order M..1 */ return eslOK; ERROR: return status; #endif //HAVE_AVX512 #ifndef HAVE_AVX512 return eslENORESULT; #endif }
/* Function: p7_StochasticTrace() * Synopsis: Sample a traceback from a Forward matrix * Incept: SRE, Fri Aug 8 17:40:18 2008 [UA217, IAD-SFO] * * Purpose: Perform a stochastic traceback from Forward matrix <ox>, * using random number generator <r>, in order to sample an * alignment of model <om> to digital sequence <dsq> of * length <L>. * * The sampled traceback is returned in <tr>, which the * caller provides with at least an initial allocation; * the <tr> allocation will be grown as needed here. * * Args: r - source of random numbers * dsq - digital sequence being aligned, 1..L * L - length of dsq * om - profile * ox - Forward matrix to trace, LxM * tr - storage for the recovered traceback * * Returns: <eslOK> on success * * Throws: <eslEMEM> on allocation error. * <eslEINVAL> on several types of problems, including: * the trace isn't empty (wasn't Reuse()'d); */ int p7_StochasticTrace(ESL_RANDOMNESS *rng, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *ox, P7_TRACE *tr) { int i; /* position in sequence 1..L */ int k; /* position in model 1..M */ int s0, s1; /* choice of a state */ int status; if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace not empty; needs to be Reuse()'d?"); i = L; k = 0; if ((status = p7_trace_Append(tr, p7T_T, k, i)) != eslOK) return status; if ((status = p7_trace_Append(tr, p7T_C, k, i)) != eslOK) return status; s0 = tr->st[tr->N-1]; while (s0 != p7T_S) { switch (s0) { case p7T_M: s1 = select_m(rng, om, ox, i, k); k--; i--; break; case p7T_D: s1 = select_d(rng, om, ox, i, k); k--; break; case p7T_I: s1 = select_i(rng, om, ox, i, k); i--; break; case p7T_N: s1 = select_n(i); break; case p7T_C: s1 = select_c(rng, om, ox, i); break; case p7T_J: s1 = select_j(rng, om, ox, i); break; case p7T_E: s1 = select_e(rng, om, ox, i, &k); break; case p7T_B: s1 = select_b(rng, om, ox, i); break; default: ESL_EXCEPTION(eslEINVAL, "bogus state in traceback"); } if (s1 == -1) ESL_EXCEPTION(eslEINVAL, "Stochastic traceback choice failed"); if ((status = p7_trace_Append(tr, s1, k, i)) != eslOK) return status; if ( (s1 == p7T_N || s1 == p7T_J || s1 == p7T_C) && s1 == s0) i--; s0 = s1; } /* end traceback, at S state */ tr->M = om->M; tr->L = L; return p7_trace_Reverse(tr); }