/* Function: esl_recorder_ResizeTo() * Synopsis: Reallocate an <ESL_RECORDER> for a new <maxlines> * Incept: SRE, Fri Dec 25 17:02:46 2009 [Casa de Gatos] * * Purpose: Reallocate the <ESL_RECORDER> <rc> to have a new * window size <maxlines>. * * The new <maxlines> may be more or less than the previous * window size for <rc>. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> if (re-)allocation fails. * * <eslEINVAL> if the recorder has a marked line (for start * of a block) and you try to shrink it so much that that * marked line would be lost. * * <eslEINCONCEIVABLE> on any baseline resetting problem; * this would have to be an internal error in the module. * * Note: We may have to repermute the line array, and reset its * baseline, as follows. * * In the growth case: if the line array is out of order * (circularly permuted) we must straighten it out, which * means resetting the baseline. * i.e. to grow 3 1 2 to nalloc=6, we need 1 2 3 x x x; * simple reallocation to 3 1 2 x x x doesn't work, * next read would make 3 4 2 x x x. * * In the shrinkage case: if the line array is in use beyond the * new array size, we set a new baseline to keep as much of the * old array as possible. * * i.e. for 6->3 * 1 2 3 x x x -> 1 2 3 * 1 2 3 4 x x -> 2 3 4 with new baseline=2. * 4 5 0 1 2 3 -> 3 4 5 with new baseline=3 */ int esl_recorder_ResizeTo(ESL_RECORDER *rc, int new_maxlines) { int idx; int newbase; void *tmp; int minlines; int status; if (new_maxlines == rc->nalloc) return eslOK; if (new_maxlines > rc->nalloc) /* growth case */ { if ((rc->nread - rc->baseline) / rc->nalloc != 0) /* array is permuted; reorder it */ { newbase = ESL_MAX(rc->baseline, rc->nread - rc->nalloc); status = recorder_new_baseline(rc, newbase); if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly"); } } else /* shrinkage case */ { /* check that the marked line (if any) will stay in window */ if (rc->markline >= 0) { minlines = rc->nread - rc->markline; if (new_maxlines < minlines) ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing marked line"); } /* check that current line will stay in window */ minlines = rc->nread - rc->ncurr + 1; if (new_maxlines < minlines) ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing current line"); if (rc->nread - rc->baseline > new_maxlines) /* baseline needs to move up */ { newbase = rc->nread - new_maxlines; status = recorder_new_baseline(rc, newbase); if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly"); } for (idx = new_maxlines; idx < rc->nalloc; idx++) if (rc->line[idx]) free(rc->line[idx]); } ESL_RALLOC(rc->line, tmp, sizeof(char *) * new_maxlines); ESL_RALLOC(rc->lalloc, tmp, sizeof(int) * new_maxlines); ESL_RALLOC(rc->offset, tmp, sizeof(off_t) * new_maxlines); for (idx = rc->nalloc; idx < new_maxlines; idx++) /* no-op in shrinkage case */ { rc->line[idx] = NULL; rc->lalloc[idx] = 0; rc->offset[idx] = 0; } rc->nalloc = new_maxlines; return eslOK; ERROR: return status; }
int esl_hmx_GrowTo(ESL_HMX *mx, int L, int M) { uint64_t ncells; void *p; int do_reset = FALSE; int i; int status; if (L < mx->allocR && M <= mx->allocM) return eslOK; /* Do we have to reallocate the 2D matrix, or can we get away with * rejiggering the row pointers into the existing memory? */ ncells = (L+1) * M; if (ncells > mx->ncells) { ESL_RALLOC(mx->dp_mem, p, sizeof(float) * ncells); mx->ncells = ncells; do_reset = TRUE; } /* must we reallocate row pointers? */ if (L >= mx->allocR) { ESL_RALLOC(mx->dp, p, sizeof(float *) * (L+1)); ESL_RALLOC(mx->sc, p, sizeof(float) * (L+2)); mx->allocR = L+1; mx->allocM = M; do_reset = TRUE; } /* must we widen the rows? */ if (M > mx->allocM) { mx->allocM = M; do_reset = TRUE; } /* must we set some more valid row pointers? */ if (L >= mx->validR) do_reset = TRUE; /* did we trigger a relayout of row pointers? */ if (do_reset) { mx->validR = ESL_MIN(mx->ncells / mx->allocM, mx->allocR); for (i = 0; i < mx->validR; i++) mx->dp[i] = mx->dp_mem + i*mx->allocM; } mx->M = 0; mx->L = 0; return eslOK; ERROR: return status; }
/* Function: p7_gmx_GrowTo() * Synopsis: Assure that DP matrix is big enough. * * Purpose: Assures that a DP matrix <gx> is allocated * for a model of size up to <M> and a sequence of * length up to <L>; reallocates if necessary. * * This function does not respect the configured * <RAMLIMIT>; it will allocate what it's told to * allocate. * * Returns: <eslOK> on success, and <gx> may be reallocated upon * return; any data that may have been in <gx> must be * assumed to be invalidated. * * Throws: <eslEMEM> on allocation failure, and any data that may * have been in <gx> must be assumed to be invalidated. */ int p7_gmx_GrowTo(P7_GMX *gx, int M, int L) { int status; void *p; int i; uint64_t ncells; int do_reset = FALSE; if (M < gx->allocW && L < gx->validR) return eslOK; /* must we realloc the 2D matrices? (or can we get away with just * jiggering the row pointers, if we are growing in one dimension * while shrinking in another?) */ ncells = (uint64_t) (M+1) * (uint64_t) (L+1); if (ncells > gx->ncells) { ESL_RALLOC(gx->dp_mem, p, sizeof(float) * ncells * p7G_NSCELLS); gx->ncells = ncells; do_reset = TRUE; } /* must we reallocate the row pointers? */ if (L >= gx->allocR) { ESL_RALLOC(gx->xmx, p, sizeof(float) * (L+1) * p7G_NXCELLS); ESL_RALLOC(gx->dp, p, sizeof(float *) * (L+1)); gx->allocR = L+1; /* allocW will also get set, in the do_reset block */ do_reset = TRUE; } /* must we widen the rows? */ if (M >= gx->allocW) do_reset = TRUE; /* must we set some more valid row pointers? */ if (L >= gx->validR) do_reset = TRUE; /* resize the rows and reset all the valid row pointers.*/ if (do_reset) { gx->allocW = M+1; gx->validR = ESL_MIN(gx->ncells / gx->allocW, gx->allocR); for (i = 0; i < gx->validR; i++) gx->dp[i] = gx->dp_mem + i * (gx->allocW) * p7G_NSCELLS; } gx->M = 0; gx->L = 0; return eslOK; ERROR: return status; }
/* Function: p7_tophits_Grow() * Synopsis: Reallocates a larger hit list, if needed. * * Purpose: If list <h> cannot hold another hit, doubles * the internal allocation. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure; in this case, * the data in <h> are unchanged. */ int p7_tophits_Grow(P7_TOPHITS *h) { void *p; P7_HIT *ori = h->unsrt; int Nalloc = h->Nalloc * 2; /* grow by doubling */ int i; int status; if (h->N < h->Nalloc) return eslOK; /* we have enough room for another hit */ if (( status = p7_hit_Grow( &(h->unsrt), h->Nalloc, Nalloc)) != eslOK) goto ERROR; ESL_RALLOC(h->hit, p, sizeof(P7_HIT *) * Nalloc); /* If we grow a sorted list, we have to translate the pointers * in h->hit, because h->unsrt might have just moved in memory. */ if (h->is_sorted_by_seqidx || h->is_sorted_by_sortkey) { for (i = 0; i < h->N; i++) h->hit[i] = h->unsrt + (h->hit[i] - ori); } h->Nalloc = Nalloc; return eslOK; ERROR: return eslEMEM; }
/* Function: p7_oprofile_MPIRecv() * Synopsis: Receives an OPROFILE as a work unit from an MPI sender. * Incept: MSF, Wed Oct 21, 2009 [Janelia] * * Purpose: Receive a work unit that consists of a single OPROFILE * sent by MPI <source> (<0..nproc-1>, or * <MPI_ANY_SOURCE>) tagged as <tag> for MPI communicator <comm>. * * Work units are prefixed by a status code. If the unit's * code is <eslOK> and no errors are encountered, this * routine will return <eslOK> and a non-<NULL> <*ret_om>. * If the unit's code is <eslEOD> (a shutdown signal), * this routine returns <eslEOD> and <*ret_om> is <NULL>. * * Caller provides a working buffer <*buf> of size * <*nalloc> characters. These are passed by reference, so * that <*buf> can be reallocated and <*nalloc> increased * if necessary. As a special case, if <*buf> is <NULL> and * <*nalloc> is 0, the buffer will be allocated * appropriately, but the caller is still responsible for * free'ing it. * * Caller may or may not already know what alphabet the OPROFILE * is expected to be in. A reference to the current * alphabet is passed in <abc>. If the alphabet is unknown, * pass <*abc = NULL>, and when the OPROFILE is received, an * appropriate new alphabet object is allocated and passed * back to the caller via <*abc>. If the alphabet is * already known, <*ret_abc> is that alphabet, and the new * OPROFILE's alphabet type is verified to agree with it. This * mechanism allows an application to let the first OPROFILE * determine the alphabet type for the application, while * still keeping the alphabet under the application's scope * of control. * * Returns: <eslOK> on success. <*ret_om> contains the received OPROFILE; * it is allocated here, and the caller is responsible for * free'ing it. <*buf> may have been reallocated to a * larger size, and <*nalloc> may have been increased. If * <*abc> was passed as <NULL>, it now points to an * <ESL_ALPHABET> object that was allocated here; caller is * responsible for free'ing this. * * Returns <eslEOD> if an end-of-data signal was received. * In this case, <*buf>, <*nalloc>, and <*abc> are left unchanged, * and <*ret_om> is <NULL>. * * Returns <eslEINCOMPAT> if the OPROFILE is in a different alphabet * than <*abc> said to expect. In this case, <*abc> is unchanged, * <*buf> and <*nalloc> may have been changed, and <*ret_om> is * <NULL>. * * Throws: <eslEMEM> on allocation error, in which case <*ret_om> is * <NULL>. */ int p7_oprofile_MPIRecv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_ALPHABET **abc, P7_OPROFILE **ret_om) { int status; int code; P7_OPROFILE *om = NULL; int n; int pos; MPI_Status mpistatus; /* Probe first, because we need to know if our buffer is big enough. */ MPI_Probe(source, tag, comm, &mpistatus); MPI_Get_count(&mpistatus, MPI_PACKED, &n); /* Make sure the buffer is allocated appropriately */ if (*buf == NULL || n > *nalloc) { void *tmp; ESL_RALLOC(*buf, tmp, sizeof(char) * n); *nalloc = n; } /* Receive the packed work unit */ MPI_Recv(*buf, n, MPI_PACKED, source, tag, comm, &mpistatus); /* Unpack it, looking at the status code prefix for EOD/EOK */ pos = 0; if (MPI_Unpack(*buf, n, &pos, &code, 1, MPI_INT, comm) != 0) ESL_XEXCEPTION(eslESYS, "mpi unpack failed"); if (code == eslEOD) { *ret_om = NULL; return eslEOD; } return p7_oprofile_MPIUnpack(*buf, *nalloc, &pos, comm, abc, ret_om); ERROR: if (om != NULL) p7_oprofile_Destroy(om); return status; }
/* Function: esl_hmm_Emit() * Synopsis: Emit a sequence from an HMM. * * Purpose: Sample one sequence from an <hmm>, using random * number generator <r>. Optionally return the sequence, * the state path, and/or the length via <opt_dsq>, * <opt_path>, and <opt_L>. * * If <opt_dsq> or <opt_path> are requested, caller * becomes responsible for free'ing their memory. * * Returns: <eslOK> on success. * * Throws: (no abnormal error conditions) */ int esl_hmm_Emit(ESL_RANDOMNESS *r, const ESL_HMM *hmm, ESL_DSQ **opt_dsq, int **opt_path, int *opt_L) { int k, L, allocL; int *path = NULL; ESL_DSQ *dsq = NULL; void *tmp = NULL; int status; ESL_ALLOC(dsq, sizeof(ESL_DSQ) * 256); ESL_ALLOC(path, sizeof(int) * 256); allocL = 256; dsq[0] = eslDSQ_SENTINEL; path[0] = -1; k = esl_rnd_FChoose(r, hmm->pi, hmm->M+1); L = 0; while (k != hmm->M) /* M is the implicit end state */ { L++; if (L >= allocL-1) { /* Reallocate path and seq if needed */ ESL_RALLOC(dsq, tmp, sizeof(ESL_DSQ) * (allocL*2)); ESL_RALLOC(path, tmp, sizeof(int) * (allocL*2)); allocL *= 2; } path[L] = k; dsq[L] = esl_rnd_FChoose(r, hmm->e[k], hmm->abc->K); k = esl_rnd_FChoose(r, hmm->t[k], hmm->M+1); } path[L+1] = hmm->M; /* sentinel for "end state" */ dsq[L+1] = eslDSQ_SENTINEL; if (opt_dsq != NULL) *opt_dsq = dsq; else free(dsq); if (opt_path != NULL) *opt_path = path; else free(path); if (opt_L != NULL) *opt_L = L; return eslOK; ERROR: if (path != NULL) free(path); if (dsq != NULL) free(dsq); return status; }
/* Function: p7_oprofile_MPISend() * Synopsis: Send an OPROFILE as an MPI work unit. * Incept: MSF, Wed Oct 21, 2009 [Janelia] * * Purpose: Sends an OPROFILE <om> as a work unit to MPI process * <dest> (where <dest> ranges from 0..<nproc-1>), tagged * with MPI tag <tag>, for MPI communicator <comm>, as * the sole workunit or result. * * Work units are prefixed by a status code. If <hmm> is * <non-NULL>, the work unit is an <eslOK> code followed by * the packed HMM. If <hmm> is NULL, the work unit is an * <eslEOD> code, which <p7_hmm_MPIRecv()> knows how to * interpret; this is typically used for an end-of-data * signal to cleanly shut down worker processes. * * In order to minimize alloc/free cycles in this routine, * caller passes a pointer to a working buffer <*buf> of * size <*nalloc> characters. If necessary (i.e. if <hmm> is * too big to fit), <*buf> will be reallocated and <*nalloc> * increased to the new size. As a special case, if <*buf> * is <NULL> and <*nalloc> is 0, the buffer will be * allocated appropriately, but the caller is still * responsible for free'ing it. * * Returns: <eslOK> on success; <*buf> may have been reallocated and * <*nalloc> may have been increased. * * Throws: <eslESYS> if an MPI call fails; <eslEMEM> if a malloc/realloc * fails. In either case, <*buf> and <*nalloc> remain valid and useful * memory (though the contents of <*buf> are undefined). * * Note: Compare to p7_hmmfile_WriteBinary(). The two operations (sending * an HMM via MPI, or saving it as a binary file to disk) are * similar. */ int p7_oprofile_MPISend(P7_OPROFILE *om, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc) { int status; int code; int sz, n, pos; /* Figure out size */ if (MPI_Pack_size(1, MPI_INT, comm, &n) != 0) ESL_XEXCEPTION(eslESYS, "mpi pack size failed"); if (om != NULL) { if ((status = p7_oprofile_MPIPackSize(om, comm, &sz)) != eslOK) return status; n += sz; } /* Make sure the buffer is allocated appropriately */ if (*buf == NULL || n > *nalloc) { void *tmp; ESL_RALLOC(*buf, tmp, sizeof(char) * n); *nalloc = n; } /* Pack the status code and OPROFILE into the buffer */ pos = 0; code = (om == NULL) ? eslEOD : eslOK; if (MPI_Pack(&code, 1, MPI_INT, *buf, n, &pos, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi pack failed"); if (om != NULL) { if ((status = p7_oprofile_MPIPack(om, *buf, n, &pos, comm)) != eslOK) return status; } /* Send the packed OPROFILE to the destination. */ if (MPI_Send(*buf, n, MPI_PACKED, dest, tag, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi send failed"); return eslOK; ERROR: return status; }
/* Function: GrowCP9Matrix() * * Purpose: Reallocate a CP9 dp matrix, if necessary, for seq for * length N, or 2 rows (if we're scanning in memory * efficient mode, in this case N == 1, nrows = N+1). * * Note: unlike HMMER, M never changes, so we only have * to worry about increasing the number of rows if nec. * * Returns individual ptrs to the four matrix components * as a convenience. * * This function allocates the requested matrix regardless * of it's size. * * If kmin and kmax are non-NULL, the matrix will be a p7 * HMM banded matrix as defined by bands in kmin, kmax. * In this case N must be length of the sequence. If caller * wants a non-banded CP9 matrix, pass kmin = kmax = NULL. * * Args: mx - an already allocated matrix to grow. * N - seq length to allocate for; N+1 rows * M - size of model, contract enforces this must == mx->M * kmin - OPTIONAL: [0.1..i..N] minimum k for residue i * kmax - OPTIONAL: [0.1..i..N] maximum k for residue i * mmx, imx, dmx, elmx, erow * - RETURN: ptrs to four mx components as a convenience * * Return: eslOK on success, eslEINCOMPAT if contract is violated, * mx is (re)allocated here. */ int GrowCP9Matrix(CP9_MX *mx, char *errbuf, int N, int M, int *kmin, int *kmax, int ***mmx, int ***imx, int ***dmx, int ***elmx, int **erow) { int status; void *p; int i; int ncells_needed = 0; int do_banded; int cur_ncells = 0; int do_reallocate; if(mx->M != M) ESL_FAIL(eslEINCOMPAT, errbuf, "GrowCP9Matrix(), mx->M: %d != M passed in: %d\n", mx->M, M); if(N < 0) ESL_FAIL(eslEINCOMPAT, errbuf, "GrowCP9Matrix(), N: %d < 0\n", N); do_banded = (kmin != NULL && kmax == NULL) ? TRUE : FALSE; if(do_banded) { for (i = 0; i <= N; i++) ncells_needed += (kmax[i] - kmin[i] + 1); } else ncells_needed = (N+1) * (M+1); do_reallocate = (ncells_needed <= mx->ncells_allocated) ? FALSE : TRUE; if(do_reallocate) { /* we need more space */ ESL_RALLOC(mx->mmx, p, sizeof(int *) * (N+1)); ESL_RALLOC(mx->imx, p, sizeof(int *) * (N+1)); ESL_RALLOC(mx->dmx, p, sizeof(int *) * (N+1)); ESL_RALLOC(mx->elmx, p, sizeof(int *) * (N+1)); ESL_RALLOC(mx->erow, p, sizeof(int) * (N+1)); ESL_RALLOC(mx->mmx_mem, p, sizeof(int) * ncells_needed); ESL_RALLOC(mx->imx_mem, p, sizeof(int) * ncells_needed); ESL_RALLOC(mx->dmx_mem, p, sizeof(int) * ncells_needed); ESL_RALLOC(mx->elmx_mem, p, sizeof(int) * ncells_needed); mx->ncells_allocated = ncells_needed; /* update size */ mx->size_Mb = (float) sizeof(CP9_MX); mx->size_Mb += (float) (sizeof(int *) * (N+1) * 4); /* mx->*mx ptrs */ mx->size_Mb += (float) (sizeof(int) * (ncells_needed * 4)); /* mx->*mx_mem */ mx->size_Mb += (float) (sizeof(int) * (N+1)); /* mx->erow */ mx->size_Mb /= 1000000.; } if(do_banded || do_reallocate) { /* rearrange pointers */ mx->mmx[0] = mx->mmx_mem; mx->imx[0] = mx->imx_mem; mx->dmx[0] = mx->dmx_mem; mx->elmx[0] = mx->elmx_mem; if(do_banded) { cur_ncells = kmax[0] - kmin[0] + 1; for (i = 1; i <= N; i++) { mx->mmx[i] = mx->mmx[0] + cur_ncells; mx->imx[i] = mx->imx[0] + cur_ncells; mx->dmx[i] = mx->dmx[0] + cur_ncells; mx->elmx[i]= mx->elmx[0]+ cur_ncells; cur_ncells += kmax[i] - kmin[i] + 1; } } else { /* non-banded, we only get here if we didn't go to done, i.e. we reallocated */ for (i = 1; i <= N; i++) { mx->mmx[i] = mx->mmx[0] + (i*(M+1)); mx->imx[i] = mx->imx[0] + (i*(M+1)); mx->dmx[i] = mx->dmx[0] + (i*(M+1)); mx->elmx[i]= mx->elmx[0]+ (i*(M+1)); } } } mx->rows = N; mx->kmin = kmin; /* could be NULL */ mx->kmax = kmax; /* could be NULL */ mx->ncells_valid = ncells_needed; if (mmx != NULL) *mmx = mx->mmx; if (imx != NULL) *imx = mx->imx; if (dmx != NULL) *dmx = mx->dmx; if (elmx!= NULL) *elmx= mx->elmx; if (erow != NULL) *erow = mx->erow; return eslOK; ERROR: ESL_FAIL(status, errbuf, ("GrowCP9Matrix(), memory reallocation error.")); }
/* Function: p7_omx_GrowTo() * Synopsis: Assure that a DP matrix is big enough. * Incept: SRE, Thu Dec 20 09:27:07 2007 [Janelia] * * Purpose: Assures that an optimized DP matrix <ox> is allocated for * a model up to <allocM> in length; if not, reallocate to * make it so. * * Because the optimized matrix is one-row, only the model * length matters; the target sequence length isn't * relevant. * * Returns: <eslOK> on success, and <gx> may be reallocated upon * return; any data that may have been in <gx> must be * assumed to be invalidated. * * Throws: <eslEMEM> on allocation failure, and any data that may * have been in <gx> must be assumed to be invalidated. */ int p7_omx_GrowTo(P7_OMX *ox, int allocM, int allocL, int allocXL) { void *p; int nqf = p7O_NQF(allocM); /* segment length; total # of striped vectors for uchar */ int nqw = p7O_NQW(allocM); /* segment length; total # of striped vectors for float */ int nqb = p7O_NQB(allocM); /* segment length; total # of striped vectors for float */ size_t ncells = (allocL+1) * nqf * 4; int reset_row_pointers = FALSE; int i; int status; /* If all possible dimensions are already satisfied, the matrix is fine */ if (ox->allocQ4*4 >= allocM && ox->validR > allocL && ox->allocXR >= allocXL+1) return eslOK; /* If the main matrix is too small in cells, reallocate it; * and we'll need to realign/reset the row pointers later. */ if (ncells > ox->ncells) { ESL_RALLOC(ox->dp_mem, p, sizeof(vector float) * (allocL+1) * nqf * p7X_NSCELLS + 15); ox->ncells = ncells; reset_row_pointers = TRUE; } /* If the X beams are too small, reallocate them. */ if (allocXL+1 >= ox->allocXR) { ESL_RALLOC(ox->x_mem, p, sizeof(float) * (allocXL+1) * p7X_NXCELLS + 15); ox->allocXR = allocXL+1; ox->xmx = (float *) ((unsigned long int) ((char *) ox->x_mem + 15) & (~0xf)); } /* If there aren't enough rows, reallocate the row pointers; we'll * realign and reset them later. */ if (allocL >= ox->allocR) { ESL_RALLOC(ox->dpb, p, sizeof(vector unsigned char *) * (allocL+1)); ESL_RALLOC(ox->dpw, p, sizeof(vector signed short * ) * (allocL+1)); ESL_RALLOC(ox->dpf, p, sizeof(vector float *) * (allocL+1)); ox->allocR = allocL+1; reset_row_pointers = TRUE; } /* must we widen the rows? */ if (allocM > ox->allocQ4*4) reset_row_pointers = TRUE; /* must we set some more valid row pointers? */ if (allocL >= ox->validR) reset_row_pointers = TRUE; /* now reset the row pointers, if needed */ if (reset_row_pointers) { ox->dpb[0] = (vector unsigned char *) ((unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf)); ox->dpw[0] = (vector signed short *) ox->dpb[0]; ox->dpf[0] = (vector float *) ox->dpb[0]; ox->validR = ESL_MIN( ox->ncells / (nqf * 4), ox->allocR); for (i = 1; i < ox->validR; i++) { ox->dpb[i] = ox->dpb[0] + i * nqb; ox->dpw[i] = ox->dpw[0] + i * nqw * p7X_NSCELLS; ox->dpf[i] = ox->dpf[0] + i * nqf * p7X_NSCELLS; } ox->allocQ4 = nqf; ox->allocQ8 = nqw; ox->allocQ16 = nqb; } ox->M = 0; ox->L = 0; return eslOK; ERROR: return status; }
static void mpi_worker(const ESL_GETOPTS *go, struct cfg_s *cfg) { int xstatus = eslOK; int status; int type; P7_BUILDER *bld = NULL; ESL_MSA *msa = NULL; ESL_MSA *postmsa = NULL; ESL_MSA **postmsa_ptr = (cfg->postmsafile != NULL) ? &postmsa : NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; char *wbuf = NULL; /* packed send/recv buffer */ void *tmp; /* for reallocation of wbuf */ int wn = 0; /* allocation size for wbuf */ int sz, n; /* size of a packed message */ int pos; char errmsg[eslERRBUFSIZE]; /* After master initialization: master broadcasts its status. */ MPI_Bcast(&xstatus, 1, MPI_INT, 0, MPI_COMM_WORLD); if (xstatus != eslOK) return; /* master saw an error code; workers do an immediate normal shutdown. */ ESL_DPRINTF2(("worker %d: sees that master has initialized\n", cfg->my_rank)); /* Master now broadcasts worker initialization information (alphabet type) * Workers returns their status post-initialization. * Initial allocation of wbuf must be large enough to guarantee that * we can pack an error result into it, because after initialization, * errors will be returned as packed (code, errmsg) messages. */ MPI_Bcast(&type, 1, MPI_INT, 0, MPI_COMM_WORLD); if (xstatus == eslOK) { if ((cfg->abc = esl_alphabet_Create(type)) == NULL) xstatus = eslEMEM; } if (xstatus == eslOK) { wn = 4096; if ((wbuf = malloc(wn * sizeof(char))) == NULL) xstatus = eslEMEM; } if (xstatus == eslOK) { if ((bld = p7_builder_Create(go, cfg->abc)) == NULL) xstatus = eslEMEM; } MPI_Reduce(&xstatus, &status, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); /* everyone sends xstatus back to master */ if (xstatus != eslOK) { if (wbuf != NULL) free(wbuf); if (bld != NULL) p7_builder_Destroy(bld); return; /* shutdown; we passed the error back for the master to deal with. */ } bg = p7_bg_Create(cfg->abc); ESL_DPRINTF2(("worker %d: initialized\n", cfg->my_rank)); /* source = 0 (master); tag = 0 */ while (esl_msa_MPIRecv(0, 0, MPI_COMM_WORLD, cfg->abc, &wbuf, &wn, &msa) == eslOK) { /* Build the HMM */ ESL_DPRINTF2(("worker %d: has received MSA %s (%d columns, %d seqs)\n", cfg->my_rank, msa->name, msa->alen, msa->nseq)); if ((status = p7_Builder(bld, msa, bg, &hmm, NULL, NULL, NULL, postmsa_ptr)) != eslOK) { strcpy(errmsg, bld->errbuf); goto ERROR; } ESL_DPRINTF2(("worker %d: has produced an HMM %s\n", cfg->my_rank, hmm->name)); /* Calculate upper bound on size of sending status, HMM, and optional postmsa; make sure wbuf can hold it. */ n = 0; if (MPI_Pack_size(1, MPI_INT, MPI_COMM_WORLD, &sz) != 0) goto ERROR; n += sz; if (p7_hmm_MPIPackSize( hmm, MPI_COMM_WORLD, &sz) != eslOK) goto ERROR; n += sz; if (esl_msa_MPIPackSize(postmsa, MPI_COMM_WORLD, &sz) != eslOK) goto ERROR; n += sz; if (n > wn) { ESL_RALLOC(wbuf, tmp, sizeof(char) * n); wn = n; } ESL_DPRINTF2(("worker %d: has calculated that HMM will pack into %d bytes\n", cfg->my_rank, n)); /* Send status, HMM, and optional postmsa back to the master */ pos = 0; if (MPI_Pack (&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD) != 0) goto ERROR; if (p7_hmm_MPIPack (hmm, wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR; if (esl_msa_MPIPack(postmsa, wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR; MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); ESL_DPRINTF2(("worker %d: has sent HMM to master in message of %d bytes\n", cfg->my_rank, pos)); esl_msa_Destroy(msa); msa = NULL; esl_msa_Destroy(postmsa); postmsa = NULL; p7_hmm_Destroy(hmm); hmm = NULL; } if (wbuf != NULL) free(wbuf); p7_builder_Destroy(bld); return; ERROR: ESL_DPRINTF2(("worker %d: fails, is sending an error message, as follows:\n%s\n", cfg->my_rank, errmsg)); pos = 0; MPI_Pack(&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Pack(errmsg, eslERRBUFSIZE, MPI_CHAR, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); if (wbuf != NULL) free(wbuf); if (msa != NULL) esl_msa_Destroy(msa); if (hmm != NULL) p7_hmm_Destroy(hmm); if (bld != NULL) p7_builder_Destroy(bld); return; }
/* Function: main() * Synopsis: Run set of queries against an FM * Purpose: Read in a FM and a file of query sequences. * For each query, find matching FM interval, then collect positions in * the original text T for the corresponding occurrences. These positions * are 0-based (so first character is position 0). */ int main(int argc, char *argv[]) { void* tmp; // used for RALLOC calls clock_t t1, t2; struct tms ts1, ts2; char *fname_fm = NULL; char *fname_queries = NULL; FM_HIT *hits = NULL; char *line = NULL; int status = eslOK; int hit_cnt = 0; int hit_indiv_cnt = 0; int miss_cnt = 0; int hit_num = 0; int hit_num2 = 0; int hits_size = 0; int i,j; int count_only = 0; FM_INTERVAL interval; FM_DATA *fmsf = NULL; FM_DATA *fmsb = NULL; FILE* fp_fm = NULL; FILE* fp = NULL; FILE* out = NULL; char *outname = NULL; ESL_GETOPTS *go = NULL; /* command line processing */ FM_CFG *cfg; FM_METADATA *meta; ESL_SQ *tmpseq; // used for sequence validation ESL_ALPHABET *abc = NULL; //start timer t1 = times(&ts1); process_commandline(argc, argv, &go, &fname_fm, &fname_queries); if (esl_opt_IsOn(go, "--out")) { outname = esl_opt_GetString(go, "--out"); if ( esl_strcmp ("-", outname) == 0 ) { out = stdout; outname = "stdout"; } else { out = fopen(outname,"w"); } } if (esl_opt_IsOn(go, "--count_only")) count_only = 1; if((fp_fm = fopen(fname_fm, "rb")) == NULL) esl_fatal("Cannot open file `%s': ", fname_fm); fm_configAlloc(&cfg); cfg->occCallCnt = 0; meta = cfg->meta; meta->fp = fp_fm; fm_readFMmeta( meta); if (meta->alph_type == fm_DNA) abc = esl_alphabet_Create(eslDNA); else if (meta->alph_type == fm_AMINO) abc = esl_alphabet_Create(eslAMINO); tmpseq = esl_sq_CreateDigital(abc); //read in FM-index blocks ESL_ALLOC(fmsf, meta->block_count * sizeof(FM_DATA) ); if (!meta->fwd_only) ESL_ALLOC(fmsb, meta->block_count * sizeof(FM_DATA) ); for (i=0; i<meta->block_count; i++) { fm_FM_read( fmsf+i,meta, TRUE ); if (!meta->fwd_only) { fm_FM_read(fmsb+i, meta, FALSE ); fmsb[i].SA = fmsf[i].SA; fmsb[i].T = fmsf[i].T; } } fclose(fp_fm); output_header(meta, stdout, go, fname_fm, fname_queries); /* initialize a few global variables, then call initGlobals * to do architecture-specific initialization */ fm_configInit(cfg, NULL); fm_alphabetCreate(meta, NULL); // don't override charBits fp = fopen(fname_queries,"r"); if (fp == NULL) esl_fatal("Unable to open file %s\n", fname_queries); ESL_ALLOC(line, FM_MAX_LINE * sizeof(char)); hits_size = 200; ESL_ALLOC(hits, hits_size * sizeof(FM_HIT)); while(fgets(line, FM_MAX_LINE, fp) ) { int qlen=0; while (line[qlen] != '\0' && line[qlen] != '\n') qlen++; if (line[qlen] == '\n') line[qlen] = '\0'; hit_num = 0; for (i=0; i<meta->block_count; i++) { fm_getSARangeReverse(fmsf+i, cfg, line, meta->inv_alph, &interval); if (interval.lower>=0 && interval.lower <= interval.upper) { int new_hit_num = interval.upper - interval.lower + 1; hit_num += new_hit_num; if (!count_only) { if (hit_num > hits_size) { hits_size = 2*hit_num; ESL_RALLOC(hits, tmp, hits_size * sizeof(FM_HIT)); } getFMHits(fmsf+i, cfg, &interval, i, hit_num-new_hit_num, qlen, hits, fm_forward); } } /* find reverse hits, using backward search on the forward FM*/ if (!meta->fwd_only) { fm_getSARangeForward(fmsb+i, cfg, line, meta->inv_alph, &interval);// yes, use the backward fm to produce the equivalent of a forward search on the forward fm if (interval.lower>=0 && interval.lower <= interval.upper) { int new_hit_num = interval.upper - interval.lower + 1; hit_num += new_hit_num; if (!count_only) { if (hit_num > hits_size) { hits_size = 2*hit_num; ESL_RALLOC(hits, tmp, hits_size * sizeof(FM_HIT)); } //even though I used fmsb above, use fmsf here, since we'll now do a backward trace //in the FM-index to find the next sampled SA position getFMHits(fmsf+i, cfg, &interval, i, hit_num-new_hit_num, qlen, hits, fm_backward); } } } } if (hit_num > 0) { if (count_only) { hit_cnt++; hit_indiv_cnt += hit_num; } else { hit_num2 = 0; //for each hit, identify the sequence id and position within that sequence for (i = 0; i< hit_num; i++) { status = fm_getOriginalPosition (fmsf, meta, hits[i].block, hits[i].length, fm_forward, hits[i].start, &(hits[i].block), &(hits[i].start) ); hits[i].sortkey = (status==eslERANGE ? -1 : meta->seq_data[ hits[i].block ].target_id); //validate match - if any characters in orig sequence were ambiguities, reject fm_convertRange2DSQ( fmsf, meta, hits[i].start, hits[i].length, p7_NOCOMPLEMENT, tmpseq, TRUE ); for (j=1; j<=hits[i].length; j++) { if (tmpseq->dsq[j] >= abc->K) { hits[i].sortkey = -1; //reject j = hits[i].length+1; //quit looking } } if (hits[i].sortkey != -1) hit_num2++; // legitimate hit } if (hit_num2 > 0) hit_cnt++; //now sort according the the sequence_id corresponding to that seq_offset qsort(hits, hit_num, sizeof(FM_HIT), hit_sorter); //skim past the skipped entries i = 0; while ( i < hit_num ) { if (hits[i].sortkey != -1 ) break; // i++; } if (i < hit_num) { if (out != NULL) { fprintf (out, "%s\n",line); //fprintf (out, "\t%10s (%8d %s)\n",meta->seq_data[ hits[i].block ].name, hits[i].start, (hits[i].direction==fm_forward?"+":"-")); fprintf (out, " %8ld %s %10s\n", (long)(hits[i].start), (hits[i].direction==fm_forward?"f":"r"), meta->seq_data[ hits[i].block ].name); } hit_indiv_cnt++; i++; // skip the first one, since I'll be comparing each to the previous for ( ; i< hit_num; i++) { if ( //meta->seq_data[ hits[i].block ].id != meta->seq_data[ hits[i-1].block ].id || hits[i].sortkey != hits[i-1].sortkey || //sortkey is seq_data[].id hits[i].direction != hits[i-1].direction || hits[i].start != hits[i-1].start ) { if (out != NULL) //fprintf (out, "\t%10s (%8d %s)\n",meta->seq_data[ hits[i].block ].name, hits[i].start, (hits[i].direction==fm_forward?"+":"-")); fprintf (out, " %8ld %s %10s\n", (long)(hits[i].start), (hits[i].direction==fm_forward?"f":"r"), meta->seq_data[ hits[i].block ].name); hit_indiv_cnt++; } } if (out != NULL) fprintf (out, "\n"); } } } else { miss_cnt++; } } for (i=0; i<meta->block_count; i++) { fm_FM_destroy( fmsf+i, 1 ); if (!meta->fwd_only) fm_FM_destroy( fmsb+i, 0 ); } free (hits); free (line); fclose(fp); fm_configDestroy(cfg); // compute and print the elapsed time in millisec t2 = times(&ts2); { double clk_ticks = sysconf(_SC_CLK_TCK); double elapsedTime = (t2-t1)/clk_ticks; double throughput = cfg->occCallCnt/elapsedTime; fprintf (stderr, "hit: %-10d (%d)\n", hit_cnt, hit_indiv_cnt); fprintf (stderr, "miss:%-10d\n", miss_cnt); fprintf (stderr, "run time: %.2f seconds\n", elapsedTime); fprintf (stderr, "occ calls: %12s\n", commaprint(cfg->occCallCnt)); fprintf (stderr, "occ/sec: %12s\n", commaprint(throughput)); } exit(eslOK); ERROR: printf ("failure allocating memory for hits\n"); exit(status); }
/* Each test sequence will contain one or two domains, depending on whether --single is set. */ static int synthesize_positives(ESL_GETOPTS *go, struct cfg_s *cfg, char *testname, ESL_STACK *teststack, int *ret_ntest) { ESL_SQ *domain1, *domain2; ESL_SQ *sq; void *p; int64_t L; /* total length of synthetic test seq */ int d1n, d2n; /* lengths of two domains */ int L1,L2,L3; /* lengths of three random regions */ int i,j; int ntest = 0; int ndomains = ( (esl_opt_GetBoolean(go, "--single") == TRUE) ? 1 : 2); int status; while (esl_stack_ObjectCount(teststack) >= ndomains) { ESL_RALLOC(cfg->test_lens, p, (cfg->ntest+1) * sizeof(struct testseq_s)); /* Pop our one or two test domains off the stack */ esl_stack_PPop(teststack, &p); domain1 = p; d1n = domain1->n; if (ndomains == 2) { esl_stack_PPop(teststack, &p); domain2 = p; d2n = domain2->n; } else { domain2 = NULL; d2n = 0; } /* Select a random total sequence length */ if (d1n+d2n > cfg->db_maxL) esl_fatal("can't construct test seq; no db seq >= %d residues\n", d1n+d2n); do { if (esl_ssi_FindNumber(cfg->dbfp->data.ascii.ssi, esl_rnd_Roll(cfg->r, cfg->db_nseq), NULL, NULL, NULL, &L, NULL) != eslOK) esl_fatal("failed to look up a random seq"); } while (L < d1n+d2n); /* Now figure out the embedding */ if (ndomains == 2) { /* Select random lengths of three flanking domains; * Imagine picking two "insert after" points i,j in sequence 1..L', for * L' = L-d1n-d2n (the total length of nonhomologous test seq) */ do { i = esl_rnd_Roll(cfg->r, L - d1n - d2n + 1 ); /* i = 0..L' */ j = esl_rnd_Roll(cfg->r, L - d1n - d2n + 1 ); /* j = 0..L' */ } while (i > j); /* now 1 .. i = random region 1 (if i==0, there's none); * i+1 .. i+d1n = domain 1 * i+d1n+1 .. j+d1n = random region 2 (if i==j, there's none); * j+d1n+1 .. j+d1n+d2n = domain 2 * j+d1n+d2n+1 .. L = random region 3 (if j == L-d1n-d2n, there's none); */ L1 = i; L2 = j-i; L3 = L - d1n - d2n - j; } else { /* embedding one domain */ i = esl_rnd_Roll(cfg->r, L - d1n + 1 ); /* i = 0..L' */ /* now 1 .. i = random region 1 (if i==0, there's none); * i+1 .. i+d1n = domain 1 * i+d1n+1 .. L = random region 2 (if i==j, there's none); */ L1 = i; L2 = L - d1n - L1; L3 = 0; } sq = esl_sq_CreateDigital(cfg->abc); esl_sq_GrowTo(sq, L); sq->n = L; if (ndomains == 2) { esl_sq_FormatName(sq, "%s/%d/%d-%d/%d-%d", testname, cfg->ntest, i+1, i+d1n, j+d1n+1, j+d1n+d2n); esl_sq_FormatDesc(sq, "domains: %s %s", domain1->name, domain2->name); } else { esl_sq_FormatName(sq, "%s/%d/%d-%d", testname, cfg->ntest, i+1, i+d1n); esl_sq_FormatDesc(sq, "domain: %s", domain1->name); } fprintf(cfg->possummfp, "%-35s %5d %5d %5d %5d %5d %5d", sq->name, (int) sq->n, L1, d1n, L2, d2n, L3); sq->dsq[0] = sq->dsq[L+1] = eslDSQ_SENTINEL; set_random_segment(go, cfg, cfg->possummfp, sq->dsq+1, L1); memcpy(sq->dsq+i+1, domain1->dsq+1, sizeof(ESL_DSQ) * d1n); fprintf(cfg->possummfp, " %-24s %5d %5d", domain1->name, 1, d1n); set_random_segment(go, cfg, cfg->possummfp, sq->dsq+i+d1n+1, L2); if (ndomains == 2) { memcpy(sq->dsq+j+d1n+1, domain2->dsq+1, sizeof(ESL_DSQ) * d2n); fprintf(cfg->possummfp, " %-24s %5d %5d", domain2->name, 1, d2n); set_random_segment(go, cfg, cfg->possummfp, sq->dsq+j+d1n+d2n+1, L3); } fprintf(cfg->possummfp, "\n"); cfg->test_lens[cfg->ntest].L = L; cfg->test_lens[cfg->ntest].L1 = L1; cfg->test_lens[cfg->ntest].d1n = d1n; cfg->test_lens[cfg->ntest].L2 = L2; cfg->test_lens[cfg->ntest].d2n = d2n; cfg->test_lens[cfg->ntest].L3 = L3; cfg->ntest++; ntest++; esl_sqio_Write(cfg->out_seqfp, sq, eslSQFILE_FASTA, FALSE); esl_sq_Destroy(domain1); if (ndomains == 2) esl_sq_Destroy(domain2); esl_sq_Destroy(sq); } *ret_ntest = ntest; return eslOK; ERROR: esl_fatal("Failure in synthesize_positives"); return status; }
/* glocal_rescore_isolated_domain() * EPN, Tue Oct 5 10:16:12 2010 * * Based on p7_domaindef.c's rescore_isolated_domain(). Modified * so that generic matrices (which can be used for glocally configured * models) can be used. This function finds a single glocal domain, not a * single local one. * * Also modified to optionally remove the Backward and OA alignment. * The decision to do these is determined by three input parameters: * <null2_is_done>: TRUE if we've already computed the null2 scores for * this region (see Sean's notes below). * <do_null2>: TRUE if we will apply a null2 penalty eventually * to this domain * <do_aln>: TRUE if we need the OA alignment * * Notes (verbatim) from p7_domaindef.c::rescore_isolated_domain(): *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * SRE, Fri Feb 8 09:18:33 2008 [Janelia] * * We have isolated a single domain's envelope from <i>..<j> in * sequence <sq>, and now we want to score it in isolation and obtain * an alignment display for it. * * (Later, we can add up all the individual domain scores from this * seq into a new per-seq score, to compare to the original per-seq * score). * * The caller provides model <om> configured in unilocal mode; by * using unilocal (as opposed to multilocal), we're going to force the * identification of a single domain in this envelope now. * * The alignment is an optimal accuracy alignment (sensu IH Holmes), * also obtained in unilocal mode. * * The caller provides DP matrices <ox1> and <ox2> with sufficient * space to hold Forward and Backward calculations for this domain * against the model. (The caller will typically already have matrices * sufficient for the complete sequence lying around, and can just use * those.) The caller also provides a <P7_DOMAINDEF> object which is * (efficiently, we trust) managing any necessary temporary working * space and heuristic thresholds. * * Returns <eslOK> if a domain was successfully identified, scored, * and aligned in the envelope; if so, the per-domain information is * registered in <ddef>, in <ddef->dcl>. * * And here's what's happened to our working memory: * * <ddef>: <ddef->tr> has been used, and possibly reallocated, for * the OA trace of the domain. Before exit, we called * <Reuse()> on it. * * <ox1> : happens to be holding OA score matrix for the domain * upon return, but that's not part of the spec; officially * its contents are "undefined". * * <ox2> : happens to be holding a posterior probability matrix * for the domain upon return, but we're not making that * part of the spec, so caller shouldn't rely on this; * spec just makes its contents "undefined". */ static int glocal_rescore_isolated_domain(P7_DOMAINDEF *ddef, const P7_PROFILE *gm, const ESL_SQ *sq, P7_GMX *gx1, P7_GMX *gx2, int i, int j, int null2_is_done, int do_null2, int do_aln) { P7_DOMAIN *dom = NULL; int Ld = j-i+1; float domcorrection = 0.0; float envsc, oasc; int z; int pos; float null2[p7_MAXCODE]; int status; p7_GForward (sq->dsq + i-1, Ld, gm, gx1, &envsc); oasc = 0.; if(do_null2 || do_aln) { p7_GBackward(sq->dsq + i-1, Ld, gm, gx2, NULL); status = p7_GDecoding(gm, gx1, gx2, gx2); /* <ox2> is now overwritten with post probabilities */ if (status == eslERANGE) return eslFAIL; /* rare: numeric overflow; domain is assumed to be repetitive garbage [J3/119-212] */ /* Is null2 set already for this i..j? (It is, if we're in a domain that * was defined by stochastic traceback clustering in a multidomain region; * it isn't yet, if we're in a simple one-domain region). If it isn't, * do it now, by the expectation (posterior decoding) method. */ if ((! null2_is_done) && do_null2) { p7_GNull2_ByExpectation(gm, gx2, null2); for (pos = i; pos <= j; pos++) ddef->n2sc[pos] = logf(null2[sq->dsq[pos]]); } if(do_null2) { for (pos = i; pos <= j; pos++) domcorrection += ddef->n2sc[pos]; /* domcorrection is in units of NATS */ } if(do_aln) { /* Find an optimal accuracy alignment */ p7_GOptimalAccuracy(gm, gx2, gx1, &oasc); /* <ox1> is now overwritten with OA scores */ p7_GOATrace (gm, gx2, gx1, ddef->tr); /* <tr>'s seq coords are offset by i-1, rel to orig dsq */ /* hack the trace's sq coords to be correct w.r.t. original dsq */ for (z = 0; z < ddef->tr->N; z++) if (ddef->tr->i[z] > 0) ddef->tr->i[z] += i-1; } /* get ptr to next empty domain structure in domaindef's results */ } if (ddef->ndom == ddef->nalloc) { void *p; ESL_RALLOC(ddef->dcl, p, sizeof(P7_DOMAIN) * (ddef->nalloc*2)); ddef->nalloc *= 2; } dom = &(ddef->dcl[ddef->ndom]); /* store the results in it */ dom->ienv = i; dom->jenv = j; dom->envsc = envsc; /* in units of NATS */ dom->domcorrection = domcorrection; /* in units of NATS, will be 0. if do_null2 == FALSE */ dom->oasc = oasc; /* in units of expected # of correctly aligned residues, will be 0. if do_aln == FALSE */ dom->dombias = 0.0; /* gets set later, using bg->omega and dombias */ dom->bitscore = 0.0; /* gets set later by caller, using envsc, null score, and dombias */ dom->lnP = 1.0; /* gets set later by caller, using bitscore */ dom->is_reported = FALSE; /* gets set later by caller */ dom->is_included = FALSE; /* gets set later by caller */ dom->ad = NULL; dom->iali = i; dom->jali = j; ddef->ndom++; if(do_aln) { p7_trace_Reuse(ddef->tr); } return eslOK; ERROR: if(do_aln) { p7_trace_Reuse(ddef->tr); } return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* application configuration */ char *hmmfile = NULL; /* HMM file name */ char *seqfile = NULL; /* sequence file name */ char *mapfile = NULL; /* optional mapped MSA file name */ int infmt = eslSQFILE_UNKNOWN; int outfmt = eslMSAFILE_STOCKHOLM; P7_HMMFILE *hfp = NULL; /* open HMM file */ ESL_SQFILE *sqfp = NULL; /* open sequence file */ char *outfile = NULL; /* output filename */ FILE *ofp = stdout; /* output stream */ ESL_SQ **sq = NULL; /* array of sequences */ void *p = NULL; /* tmp ptr for reallocation */ int nseq = 0; /* # of sequences in <seqfile> */ int mapseq = 0; /* # of sequences in mapped MSA */ int totseq = 0; /* # of seqs in all sources */ ESL_ALPHABET *abc = NULL; /* alphabet (set from the HMM file)*/ P7_HMM *hmm = NULL; P7_TRACE **tr = NULL; /* array of tracebacks */ ESL_MSA *msa = NULL; /* resulting multiple alignment */ int msaopts = 0; /* flags to p7_tracealign_Seqs() */ int idx; /* counter over seqs, traces */ int status; /* easel/hmmer return code */ char errbuf[eslERRBUFSIZE]; /* Parse the command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") ) cmdline_help (argv[0], go); if (esl_opt_ArgNumber(go) != 2) cmdline_failure(argv[0], "Incorrect number of command line arguments.\n"); hmmfile = esl_opt_GetArg(go, 1); seqfile = esl_opt_GetArg(go, 2); if (strcmp(hmmfile, "-") == 0 && strcmp(seqfile, "-") == 0) cmdline_failure(argv[0], "Either <hmmfile> or <seqfile> may be '-' (to read from stdin), but not both.\n"); msaopts |= p7_ALL_CONSENSUS_COLS; /* default as of 3.1 */ if (esl_opt_GetBoolean(go, "--trim")) msaopts |= p7_TRIM; /* If caller declared an input format, decode it */ if (esl_opt_IsOn(go, "--informat")) { infmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--informat")); if (infmt == eslSQFILE_UNKNOWN) cmdline_failure(argv[0], "%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--informat")); } /* Determine output alignment file format */ outfmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--outformat")); if (outfmt == eslMSAFILE_UNKNOWN) cmdline_failure(argv[0], "%s is not a recognized output MSA file format\n", esl_opt_GetString(go, "--outformat")); /* Open output stream */ if ( (outfile = esl_opt_GetString(go, "-o")) != NULL) { if ((ofp = fopen(outfile, "w")) == NULL) cmdline_failure(argv[0], "failed to open -o output file %s for writing\n", outfile); } /* If caller forced an alphabet on us, create the one the caller wants */ if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); /* Read one HMM, and make sure there's only one. */ status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); status = p7_hmmfile_Read(hfp, &abc, &hmm); if (status == eslEFORMAT) p7_Fail("Bad file format in HMM file %s:\n%s\n", hfp->fname, hfp->errbuf); else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", hfp->fname, esl_abc_DecodeType(abc->type)); else if (status == eslEOF) p7_Fail("Empty HMM file %s? No HMM data found.\n", hfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s\n", hfp->fname); status = p7_hmmfile_Read(hfp, &abc, NULL); if (status != eslEOF) p7_Fail("HMM file %s does not contain just one HMM\n", hfp->fname); p7_hmmfile_Close(hfp); /* We're going to build up two arrays: sequences and traces. * If --mapali option is chosen, the first set of sequences/traces is from the provided alignment */ if ( (mapfile = esl_opt_GetString(go, "--mapali")) != NULL) { map_alignment(mapfile, hmm, &sq, &tr, &mapseq); } totseq = mapseq; /* Read digital sequences into an array (possibly concat'ed onto mapped seqs) */ status = esl_sqfile_OpenDigital(abc, seqfile, infmt, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("Failed to open sequence file %s for reading\n", seqfile); else if (status == eslEFORMAT) p7_Fail("Sequence file %s is empty or misformatted\n", seqfile); else if (status != eslOK) p7_Fail("Unexpected error %d opening sequence file %s\n", status, seqfile); ESL_RALLOC(sq, p, sizeof(ESL_SQ *) * (totseq + 1)); sq[totseq] = esl_sq_CreateDigital(abc); nseq = 0; while ((status = esl_sqio_Read(sqfp, sq[totseq+nseq])) == eslOK) { nseq++; ESL_RALLOC(sq, p, sizeof(ESL_SQ *) * (totseq+nseq+1)); sq[totseq+nseq] = esl_sq_CreateDigital(abc); } if (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", sqfp->filename, esl_sqfile_GetErrorBuf(sqfp)); else if (status != eslEOF) esl_fatal("Unexpected error %d reading sequence file %s", status, sqfp->filename); esl_sqfile_Close(sqfp); totseq += nseq; /* Remaining initializations, including trace array allocation */ ESL_RALLOC(tr, p, sizeof(P7_TRACE *) * totseq); for (idx = mapseq; idx < totseq; idx++) tr[idx] = p7_trace_CreateWithPP(); p7_tracealign_computeTraces(hmm, sq, mapseq, totseq - mapseq, tr); p7_tracealign_Seqs(sq, tr, totseq, hmm->M, msaopts, hmm, &msa); eslx_msafile_Write(ofp, msa, outfmt); for (idx = 0; idx <= totseq; idx++) esl_sq_Destroy(sq[idx]); /* including sq[nseq] because we overallocated */ for (idx = 0; idx < totseq; idx++) p7_trace_Destroy(tr[idx]); free(sq); free(tr); esl_msa_Destroy(msa); p7_hmm_Destroy(hmm); if (ofp != stdout) fclose(ofp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return eslOK; ERROR: return status; }