/* Function: p7_coords2_hash_Reuse() * Synopsis: Reuse a <P7_COORDS2> * * Purpose: Clear a <P7_COORDS2_HASH> hash table for reuse. * * If any allocations are overly large, drop them * back to 'redline' values. Default redlines * are 1024 keys (i.e. different coord pair arrays), * 1024 hash values, and 16384 total integers of * raw data. Redlines are all 8x the default * initial allocations. * * Args: ch : hash table to reuse * * Returns: <eslOK> on success * * Throws: <eslEMEM> on allocation failure. * (But any reallocations here are shrinkages, so I don't * believe they can fail.) */ int p7_coords2_hash_Reuse(P7_COORDS2_HASH *ch) { int hashsize_redline = 1024; int kalloc_redline = 1024; int calloc_redline = 16384; int i; int status; if (ch->hashsize > hashsize_redline) { ESL_REALLOC(ch->hashtable, sizeof(int32_t) * hashsize_redline); ch->hashsize = hashsize_redline; } if (ch->kalloc > kalloc_redline) { ESL_REALLOC(ch->nxt, sizeof(int32_t) * kalloc_redline); ESL_REALLOC(ch->key_offset, sizeof(int32_t) * kalloc_redline); ch->kalloc = kalloc_redline; } if (ch->calloc > calloc_redline) { ESL_REALLOC(ch->cmem, sizeof(int32_t) * ch->calloc); ch->calloc = calloc_redline; } for (i = 0; i < ch->hashsize; i++) ch->hashtable[i] = -1; ch->nkeys = 0; ch->cn = 0; return eslOK; ERROR: return status; }
/* Function: esl_keyhash_Store() * Synopsis: Store a key and get a key index for it. * * Purpose: Store a string <key> of length <n> in the key index hash table <kh>. * Associate it with a unique key index, counting from * 0. It's this index that lets us map the hashed keys to * integer-indexed C arrays, clumsily emulating Perl's * hashes. Optionally returns the index through <opt_index>. * * <key>, <n> follow the standard idiom for strings and * unterminated buffers. * * Returns: <eslOK> on success; stores <key> in <kh>; <opt_index> is * returned, set to the next higher index value. * Returns <eslEDUP> if <key> was already stored in the table; * <opt_index> is set to the existing index for <key>. * * Throws: <eslEMEM> on allocation failure, and sets <opt_index> to -1. */ int esl_keyhash_Store(ESL_KEYHASH *kh, const char *key, esl_pos_t n, int *opt_index) { uint32_t val = jenkins_hash(key, n, kh->hashsize); int idx; int status; if (n == -1) n = strlen(key); /* Was this key already stored? */ for (idx = kh->hashtable[val]; idx != -1; idx = kh->nxt[idx]) if (esl_memstrcmp(key, n, kh->smem + kh->key_offset[idx])) { if (opt_index != NULL) *opt_index = idx; return eslEDUP; } /* Reallocate key ptr/index memory if needed */ if (kh->nkeys == kh->kalloc) { ESL_REALLOC(kh->key_offset, sizeof(int)*kh->kalloc*2); ESL_REALLOC(kh->nxt, sizeof(int)*kh->kalloc*2); kh->kalloc *= 2; } /* Reallocate key string memory if needed */ while (kh->sn + n + 1 > kh->salloc) { ESL_REALLOC(kh->smem, sizeof(char) * kh->salloc * 2); kh->salloc *= 2; } /* Copy the key, assign its index */ idx = kh->nkeys; kh->key_offset[idx] = kh->sn; kh->sn += n+1; esl_memstrcpy(key, n, kh->smem + kh->key_offset[idx]); kh->nkeys++; /* Insert new element at head of the approp linked list in hashtable */ kh->nxt[idx] = kh->hashtable[val]; kh->hashtable[val] = idx; /* Time to upsize? If we're 3x saturated, expand the hash table */ if (kh->nkeys > 3*kh->hashsize) if ((status = key_upsize(kh)) != eslOK) goto ERROR; if (opt_index != NULL) *opt_index = idx; return eslOK; ERROR: if (opt_index != NULL) *opt_index = -1; return status; }
/* p7_coords2_hash_upsize() * * Increase the hash table size in <ch>, because it's getting * too full. This requires recalculating the hash functions for * all the previously stored keys, and re-storing them. * * Throws: <eslEMEM> on allocation failure. */ int p7_coords2_hash_upsize(P7_COORDS2_HASH *ch) { uint32_t val; int32_t i; int status; /* 28, because we're going to upsize in steps of 8x, 2^3, so need <2^(31-3) */ if (ch->hashsize >= (1<<28)) return eslOK; /* quasi-success: don't grow any more */ /* The catch: upsizing table changes all hash functions, so all * keys have to be re-hashed and re-stored. But they can stay * where they are in the data storage array. */ ESL_REALLOC(ch->hashtable, sizeof(int32_t) * (ch->hashsize << 3)); ch->hashsize = ch->hashsize << 3; /* x8 */ for (i = 0; i < ch->hashsize; i++) ch->hashtable[i] = -1; for (i = 0; i < ch->nkeys; i++) { val = p7_coords2_hash_function_alt(ch->cmem + ch->key_offset[i], ch->hashsize); ch->nxt[i] = ch->hashtable[val]; ch->hashtable[val] = i; } return eslOK; ERROR: return eslEMEM; }
/* Function: p7_filtermx_GrowTo() * Synopsis: Resize filter DP matrix for new profile size. * * Purpose: Given an existing filter matrix structure <fx>, * and the dimension <M> of a new profile that * we're going to use (in consensus positions), * assure that <fx> is large enough for such a * profile; reallocate and reinitialize as needed. * * <p7_filtermx_Reuse(fx); p7_filtermx_GrowTo(fx, M)> * is essentially equivalent to <p7_filtermx_Create(M)>, * while minimizing reallocation. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. The state of * <fx> is now undefined, and it should not be used. */ int p7_filtermx_GrowTo_avx(P7_FILTERMX *fx, int allocM) { int status; /* Contract checks / argument validation */ ESL_DASSERT1( (allocM >= 1 && allocM <= 100000) ); #ifdef HAVE_AVX2 /* is it already big enough? */ if (allocM <= fx->allocM_AVX) return eslOK; /* if not, grow it */ ESL_REALLOC(fx->dp_mem_AVX, (sizeof(__m256i) * (p7F_NSCELLS * P7_NVW_AVX(allocM))) + (p7_VALIGN_AVX-1)); fx->allocM_AVX = allocM; fx->dp_AVX = (__m256i *) ( (unsigned long int) ( (char *) fx->dp_mem_AVX + (p7_VALIGN_AVX-1)) & p7_VALIMASK_AVX); return eslOK; ERROR: return status; #endif //HAVE_AVX2 #ifndef HAVE_AVX2 return eslENORESULT; #endif }
/* Function: p7_anchors_Resize() * Synopsis: Reallocate a P7_ANCHORS object, if necessary * * Purpose: Make sure that <anch> can hold an array of * at least <D> anchors. * * Does not alter any data that are already stored * in <anch>, so it's safe to resize an anchor * array that we're growing incrementally (as in * segmental divide and conquer MPAS algorithm). * * D=0 is a valid argument and may occur in normal use; it * results in a no-op, because the structure is always big * enough to hold zero anchors. * * Xref: First example of a new pattern for how we * can handle reallocation/reuse strategy, * replacing _Reinit() and _Grow() interfaces. * [SRE:J14/1] */ int p7_anchors_Resize(P7_ANCHORS *anch, int D) { int nalloc; int status; /* Contract checks, argument validation */ ESL_DASSERT1(( anch->nalloc > 0 )); if (D+2 <= anch->nalloc) return eslOK; // If we're big enough already, do nothing; else if (D+2 < anch->nredline || anch->D > 0) // If we're under the redline max, or if it looks like { // we're building the anchor array incrementally, nalloc = anch->nalloc; // we reallocate by doubling, trying to minimize while (nalloc < D+2) nalloc *= 2; // the need for more reallocations soon. } // If we're over redline AND it looks like we're else nalloc = D+2; // starting an empty object, allocate exactly. // Now nalloc will probably not be a multiple of two -- // but the next _Reuse() call will pull it back // to the redline, which is. ESL_REALLOC(anch->a, sizeof(P7_ANCHOR) * nalloc); anch->nalloc = nalloc; return eslOK; ERROR: return status; }
/* Function: p7_hmm_mpi_Recv() * Synopsis: Receives an HMM as a work unit from an MPI sender. * * Purpose: Receive a work unit that consists of a single HMM * sent by MPI <source> (<0..nproc-1>, or * <MPI_ANY_SOURCE>) tagged as <tag> for MPI communicator <comm>. * * Work units are prefixed by a status code that gives the * number of HMMs to follow; here, 0 or 1 (but in the future, * we could easily extend to sending several HMMs in one * packed buffer). If we receive a 1 code and we successfully * unpack an HMM, this routine will return <eslOK> and a non-<NULL> <*ret_hmm>. * If we receive a 0 code (a shutdown signal), * this routine returns <eslEOD> and <*ret_hmm> is <NULL>. * * Caller provides a working buffer <*buf> of size * <*nalloc> characters. These are passed by reference, so * that <*buf> can be reallocated and <*nalloc> increased * if necessary. As a special case, if <*buf> is <NULL> and * <*nalloc> is 0, the buffer will be allocated * appropriately, but the caller is still responsible for * free'ing it. * * Caller may or may not already know what alphabet the HMM * is expected to be in. A reference to the current * alphabet is passed in <byp_abc>. If the alphabet is unknown, * pass <*byp_abc = NULL>, and when the HMM is received, an * appropriate new alphabet object is allocated and passed * back to the caller via <*abc>. If the alphabet is * already known, <*byp_abc> is that alphabet, and the new * HMM's alphabet type is verified to agree with it. This * mechanism allows an application to let the first HMM * determine the alphabet type for the application, while * still keeping the alphabet under the application's scope * of control. * * Args: source - index of MPI sender, 0..nproc-1 (0=master), or MPI_ANY_SOURCE * tag - MPI message tag; MPI_ANY_TAG, or a specific message tag (0..32767 will work on any MPI) * comm - MPI communicator; MPI_COMM_WORLD, or a specific MPI communicator * buf - working buffer (for receiving packed message); * if <*buf> == NULL, a <*buf> is allocated and returned; * if <*buf> != NULL, it is used (and may be reallocated) * nalloc - allocation size of <*buf> in bytes; pass 0 if <*buf==NULL>. * byp_abc - BYPASS: <*byp_abc> == ESL_ALPHABET *> if known; * <*byp_abc> == NULL> if alphabet unknown. * ret_hmm - RETURN: newly allocated/received profile * * Returns: <eslOK> on success. <*ret_hmm> contains the received HMM; * it is allocated here, and the caller is responsible for * free'ing it. <*buf> may have been reallocated to a * larger size, and <*nalloc> may have been increased. If * <*abc> was passed as <NULL>, it now points to an * <ESL_ALPHABET> object that was allocated here; caller is * responsible for free'ing this. * * Returns <eslEOD> if an end-of-data signal was received. * In this case, <*buf>, <*nalloc>, and <*abc> are left unchanged, * and <*ret_hmm> is <NULL>. * * Returns <eslEINCOMPAT> if the HMM is in a different alphabet * than <*abc> said to expect. In this case, <*abc> is unchanged, * <*buf> and <*nalloc> may have been changed, and <*ret_hmm> is * <NULL>. * * Throws: <eslEMEM> on allocation error, and <eslESYS> on MPI communication * errors; in either case <*ret_hmm> is <NULL>. */ int p7_hmm_mpi_Recv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_ALPHABET **byp_abc, P7_HMM **ret_hmm) { int pos = 0; int code; int n; MPI_Status mpistatus; int status; /* Probe first, because we need to know if our buffer is big enough. */ if ( MPI_Probe(source, tag, comm, &mpistatus) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi probe failed"); if ( MPI_Get_count(&mpistatus, MPI_PACKED, &n) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi get count failed"); /* Make sure the buffer is allocated appropriately */ if (*buf == NULL || n > *nalloc) { ESL_REALLOC(*buf, sizeof(char) * n); *nalloc = n; } /* Receive the entire packed work unit */ if (MPI_Recv(*buf, n, MPI_PACKED, source, tag, comm, &mpistatus) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi recv failed"); /* Unpack the status code prefix */ if (MPI_Unpack(*buf, n, &pos, &code, 1, MPI_INT, comm) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi unpack failed"); if (code == 0) { status = eslEOD; *ret_hmm = NULL; } else if (code == 1) status = p7_hmm_mpi_Unpack(*buf, *nalloc, &pos, comm, byp_abc, ret_hmm); else ESL_EXCEPTION(eslESYS, "bad mpi buffer transmission code"); return status; ERROR: /* from ESL_REALLOC only */ *ret_hmm = NULL; return status; }
/* Function: p7_filtermx_GrowTo() * Synopsis: Resize filter DP matrix for new profile size. * * Purpose: Given an existing filter matrix structure <fx>, * and the dimension <M> of a new profile that * we're going to use (in consensus positions), * assure that <fx> is large enough for such a * profile; reallocate and reinitialize as needed. * * <p7_filtermx_Reuse(fx); p7_filtermx_GrowTo(fx, M)> * is essentially equivalent to <p7_filtermx_Create(M)>, * while minimizing reallocation. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. The state of * <fx> is now undefined, and it should not be used. */ int p7_filtermx_GrowTo_neon64(P7_FILTERMX *fx, int allocM) { #ifdef HAVE_NEON64 int status; /* Contract checks / argument validation */ ESL_DASSERT1( (allocM >= 1 && allocM <= 100000) ); if (allocM <= fx->allocM) return eslOK; /* if not, grow it */ ESL_REALLOC(fx->dp_mem, (sizeof(esl_neon_128i_t) * (p7F_NSCELLS * P7_NVW(allocM))) + (p7_VALIGN-1)); fx->allocM = allocM; fx->dp = (esl_neon_128i_t *) ( (unsigned long int) ( (char *) fx->dp_mem + (p7_VALIGN-1)) & p7_VALIMASK); return eslOK; ERROR: return status; #endif //HAVE_NEON64 #ifndef HAVE_NEON64 return eslENORESULT; #endif }
/* Function: p7_hmm_mpi_Send() * Synopsis: Send an HMM as an MPI work unit. * * Purpose: Sends an HMM <hmm> as a work unit to MPI process * <dest> (where <dest> ranges from 0..<nproc-1>), tagged * with MPI tag <tag>, for MPI communicator <comm>, as * the sole workunit or result. * * Work units are prefixed by a status code indicating the * number of HMMs sent. If <hmm> is <NULL>, this code is 0, * and <_Recv()> interprets such a unit as an EOD * (end-of-data) signal, a signal to cleanly shut down * worker processes. * * In order to minimize alloc/free cycles in this routine, * caller passes a pointer to a working buffer <*buf> of * size <*nalloc> characters. If necessary (i.e. if <hmm> is * too big to fit), <*buf> will be reallocated and <*nalloc> * increased to the new size. As a special case, if <*buf> * is <NULL> and <*nalloc> is 0, the buffer will be * allocated appropriately, but the caller is still * responsible for free'ing it. * * Returns: <eslOK> on success; <*buf> may have been reallocated and * <*nalloc> may have been increased. * * Throws: <eslESYS> if an MPI call fails; <eslEMEM> if a malloc/realloc * fails. In either case, <*buf> and <*nalloc> remain valid and useful * memory (though the contents of <*buf> are undefined). * * Note: Compare to p7_hmmfile_WriteBinary(). The two operations (sending * an HMM via MPI, or saving it as a binary file to disk) are * similar. */ int p7_hmm_mpi_Send(const P7_HMM *hmm, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc) { int n = 0; int code; int sz, pos; int status; /* Figure out size. We always send at least a status code (0=EOD=nothing sent) */ if ( MPI_Pack_size(1, MPI_INT, comm, &sz) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi pack size failed"); n += sz; if ((status = p7_hmm_mpi_PackSize(hmm, comm, &sz)) != eslOK) return status; n += sz; /* Make sure the buffer is allocated appropriately */ if (*buf == NULL || n > *nalloc) { ESL_REALLOC(*buf, sizeof(char) * n); *nalloc = n; } /* Pack the status code and HMM into the buffer */ /* The status code is the # of HMMs being sent as one MPI message; here 1 or 0 */ pos = 0; code = (hmm ? 1 : 0); if (MPI_Pack(&code, 1, MPI_INT, *buf, n, &pos, comm) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi pack failed"); if (hmm && (status = p7_hmm_mpi_Pack(hmm, *buf, n, &pos, comm)) != eslOK) return status; /* Send the packed HMM to the destination. */ if (MPI_Send(*buf, n, MPI_PACKED, dest, tag, comm) != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi send failed"); return eslOK; ERROR: return status; }
int p7_masstrace_GrowTo(P7_MASSTRACE *mt, int M, int L) { int status; if (mt->imass && mt->ialloc < L+2) { ESL_REALLOC(mt->imass, sizeof(float) * (L+2)); mt->ialloc = L+2; } if (mt->kalloc < M+2) { ESL_REALLOC(mt->kmass, sizeof(float) * (M+2)); mt->kalloc = M+2; } return eslOK; ERROR: return status; }
/* Function: p7_sparsemask_Reinit() * Synopsis: Reinitialize an existing P7_SPARSEMASK for a new comparison. * * Purpose: Same as a <_Create()>, but reusing an existing * <P7_SPARSEMASK> to minimize reallocation calls. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. */ int p7_sparsemask_Reinit_avx512(P7_SPARSEMASK *sm, int M, int L) { #ifdef HAVE_AVX512 int i,r; int status; sm->L = L; sm->M = M; sm->Q_AVX_512 = P7_NVF_AVX_512(M); /* seg[], kmem stay at their previous salloc, kalloc * but do we need to reallocate rows for k[] and n[]? */ if (sm->ralloc_AVX_512 < L+1) { ESL_REALLOC(sm->k_AVX_512, sizeof(int *) * (L+1)); ESL_REALLOC(sm->n_AVX_512, sizeof(int) * (L+1)); sm->ralloc_AVX_512 = L+1; sm->n_rrealloc++; } sm->S_AVX_512 = 0; sm->nrow_AVX_512 = 0; sm->ncells_AVX_512 = 0; sm->last_i_AVX_512 = sm->L+1; for (r = 0; r < p7_VNF_AVX_512; r++) sm->last_k_AVX_512[r] = -1; /* sn[] are initialized for each sparse row by _StartRow() */ /* The realloc counters are NOT reset. They keep accumulating during * the life of the object. */ for (i = 1; i <= L; i++) /* n[0] will always be 0, but reinit n[1..L] */ sm->n_AVX_512[i] = 0; return eslOK; ERROR: return status; #endif //HAVE_AVX512 #ifndef HAVE_AVX512 return eslENORESULT; #endif }
int p7_gmxb_Reinit(P7_GMXB *gxb, P7_GBANDS *bnd) { int status; if (bnd->ncell > gxb->dalloc) { ESL_REALLOC(gxb->dp, sizeof(float) * bnd->ncell * p7G_NSCELLS); gxb->dalloc = bnd->ncell; } if (bnd->nrow > gxb->xalloc) { ESL_REALLOC(gxb->xmx, sizeof(float) * bnd->nrow * p7G_NXCELLS); gxb->xalloc = bnd->nrow; } gxb->bnd = bnd; return eslOK; ERROR: return status; }
int p7_gbands_GrowRows(P7_GBANDS *bnd) { int new_rowalloc = bnd->rowalloc * 2; int status; ESL_REALLOC(bnd->kmem, sizeof(int) * new_rowalloc * p7_GBANDS_NK); bnd->rowalloc = new_rowalloc; return eslOK; ERROR: return status; }
int p7_gbands_GrowSegs(P7_GBANDS *bnd) { int new_segalloc = bnd->segalloc * 2; /* grow by doubling */ int status; ESL_REALLOC(bnd->imem, sizeof(int) * new_segalloc * 2); bnd->segalloc = new_segalloc; return eslOK; ERROR: return status; }
int p7_coords2_GrowTo(P7_COORDS2 *c2, int32_t nalloc) { int status; if (c2->nalloc >= nalloc) return eslOK; ESL_REALLOC(c2->arr, sizeof(P7_COORD2) * nalloc); c2->nalloc = nalloc; return eslOK; ERROR: return status; }
/* Function: p7_coords2_Grow() * Synopsis: Increase allocation for coord pairs, if needed. * * Purpose: Check if there's enough space in <c2> to hold * a new coord pair. If not, increase the allocation * in <c2> by doubling it. * * Args: c2 : coord pair array * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. */ int p7_coords2_Grow(P7_COORDS2 *c2) { int status; if (c2->n < c2->nalloc) return eslOK; ESL_REALLOC(c2->arr, sizeof(P7_COORD2) * c2->nalloc * 2); c2->nalloc = c2->nalloc * 2; return eslOK; ERROR: return status; }
/* Function: p7_anchorhash_Reuse() * Synopsis: Reuse a <P7_ANCHORHASH> * * Purpose: Clear a <P7_ANCHORHASH> hash table for reuse. * * If any allocations are overly large, drop them * back to 'redline' values. Default redlines * are 1024 keys (i.e. different anchor sets) * 1024 hash values, and 16384 total integers of * raw data. Redlines are all 8x the default * initial allocations. * * Args: ah : hash table to reuse * * Returns: <eslOK> on success * * Throws: <eslEMEM> on allocation failure. * (But any reallocations here are shrinkages, so I don't * believe they can fail.) */ int p7_anchorhash_Reuse(P7_ANCHORHASH *ah) { int hashsize_redline = 1024; int kalloc_redline = 1024; int aalloc_redline = 16384; int i; int status; if (ah->hashsize > hashsize_redline) { ESL_REALLOC(ah->hashtable, sizeof(int32_t) * hashsize_redline); ah->hashsize = hashsize_redline; } if (ah->kalloc > kalloc_redline) { ESL_REALLOC(ah->nxt, sizeof(int32_t) * kalloc_redline); ESL_REALLOC(ah->key_offset, sizeof(int32_t) * kalloc_redline); ESL_REALLOC(ah->key_count, sizeof(int32_t) * kalloc_redline); ah->kalloc = kalloc_redline; } if (ah->aalloc > aalloc_redline) { ESL_REALLOC(ah->amem, sizeof(int32_t) * ah->aalloc); ah->aalloc = aalloc_redline; } for (i = 0; i < ah->hashsize; i++) ah->hashtable[i] = -1; ah->L = 0; ah->M = 0; ah->nkeys = 0; ah->an = 0; return eslOK; ERROR: return status; }
int p7_coords2_Reuse(P7_COORDS2 *c2) { int status; if (c2->nalloc > c2->nredline) { ESL_REALLOC(c2->arr, sizeof(P7_COORD2) * c2->nredline); c2->nalloc = c2->nredline; } c2->n = 0; return eslOK; ERROR: return status; }
int p7_anchors_Reuse(P7_ANCHORS *anch) { int status; if (anch->nalloc > anch->nredline) { ESL_REALLOC(anch->a, sizeof(P7_ANCHOR) * anch->nredline); anch->nalloc = anch->nredline; } anch->D = 0; return eslOK; ERROR: return status; }
int p7_sparsemask_StartRow_avx512(P7_SPARSEMASK *sm, int i) { #ifdef HAVE_AVX512 int r; int status; #ifdef p7_DEBUGGING if (i < 1 || i > sm->L) ESL_EXCEPTION(eslEINVAL, "i is 1..L: sequence position"); if (sm->last_i <= i) ESL_EXCEPTION(eslEINVAL, "rows need to be added in reverse order L..1"); #endif /* Make sure kmem has enough memory; if not, double it. * Because we know the original allocation was enough to hold * the slots, we know that doubling (even if ncells has filled * the current kalloc) is sufficient. */ if (sm->ncells_AVX_512 + p7_VNF_AVX_512*sm->Q_AVX_512 > sm->kalloc_AVX_512) { int64_t kalloc_req = sm->kalloc_AVX_512 * 2; ESL_REALLOC(sm->kmem_AVX_512, sizeof(int) * kalloc_req); sm->kalloc_AVX_512 = kalloc_req; sm->n_krealloc++; } for (r = 0; r < p7_VNF_AVX_512; r++) { sm->s_AVX_512[p7_VNF_AVX_512-r-1] = sm->kmem_AVX_512 + sm->ncells_AVX_512 + r*sm->Q_AVX_512; sm->sn_AVX_512[r] = 0; } sm->last_i_AVX_512 = i; for (r = 0; r < p7_VNF_AVX_512; r++) sm->last_k_AVX_512[r] = sm->M+1; /* sentinel to be sure that Add() is called in reverse order M..1 */ return eslOK; ERROR: return status; #endif //HAVE_AVX512 #ifndef HAVE_AVX512 return eslENORESULT; #endif }
/* Function: p7_hit_Grow() * Synopsis: Change the allocation of a P7_HIT array. * * Purpose: Given a ptr <*hitp> to a <P7_HIT> array, * the old allocation size <oldalloc>, and * a new allocation size <newalloc>; * reallocate the array <*hitp>. * * Returns: <eslOK> on success; <*hitp> may have moved. * * Throws: <eslEMEM> on reallocation failure. <*hitp> is * unchanged, as is the array's contents. */ int p7_hit_Grow(P7_HIT **hitp, int oldalloc, int newalloc) { int h; int status; ESL_REALLOC( (*hitp), sizeof(P7_HIT) * newalloc); for (h = oldalloc; h < newalloc; h++) { (*hitp)[h].name = NULL; (*hitp)[h].acc = NULL; (*hitp)[h].desc = NULL; (*hitp)[h].dcl = NULL; (*hitp)[h].ndom = 0; } return eslOK; ERROR: return status; }
/* Sample random domain segment positions, start/end pairs, sorted and nonoverlapping. */ int p7_coords2_Sample(ESL_RANDOMNESS *rng, P7_COORDS2 *c2, int32_t maxseg, int32_t L, int32_t **byp_wrk) { int32_t *wrk = NULL; int32_t nseg = 1 + esl_rnd_Roll(rng, maxseg); /* 1..maxseg */ int32_t i; int status; /* Using the bypass idiom, make sure we have a workspace for <L> coords */ if (esl_byp_IsInternal(byp_wrk) ) ESL_ALLOC(wrk, sizeof(int32_t) * L); else if (esl_byp_IsReturned(byp_wrk) ) ESL_ALLOC(wrk, sizeof(int32_t) * L); else if (esl_byp_IsProvided(byp_wrk) ) { wrk = *byp_wrk; ESL_REALLOC(wrk, sizeof(int32_t) * L); } /* We put the numbers 1..L into the workspace <wrk>; shuffle them; * then sort the top nseg*2 of them. This gives us <nseg> * nonoverlapping start/end coords, in order. */ for (i = 0; i < L; i++) wrk[i] = i+1; esl_vec_IShuffle(rng, wrk, L); esl_vec_ISortIncreasing(wrk, nseg*2); /* Store those randomized coords now in the data structure. */ p7_coords2_GrowTo(c2, nseg); c2->n = nseg; for (i = 0; i < nseg; i++) { c2->arr[i].n1 = wrk[i*2]; c2->arr[i].n2 = wrk[i*2+1]; } /* Using the bypass idiom, recycle workspace, if we're supposed to */ if (esl_byp_IsInternal(byp_wrk)) free(wrk); else if (esl_byp_IsReturned(byp_wrk)) *byp_wrk = wrk; else if (esl_byp_IsProvided(byp_wrk)) *byp_wrk = wrk; return eslOK; ERROR: if (esl_byp_IsInternal(byp_wrk) && wrk) free(wrk); return status; }
/* Function: allocateSeqdata() * Synopsis: ensure that space is allocated for the seqdata object * in the FM-index metadata. */ int allocateSeqdata (FM_METADATA *meta, ESL_SQ *sq, int numseqs, int *allocedseqs) { int length; int status = eslOK; if (numseqs == *allocedseqs) { // either first allocation, or increase in size *allocedseqs *= 4; // we've bumped up against allocation limit, double allocation. ESL_REALLOC (meta->seq_data, *allocedseqs * sizeof(FM_SEQDATA)); if (meta->seq_data == NULL ) esl_fatal("unable to allocate memory to store FM meta data\n"); } //allocate space for the name, source, acc, and desc of the sequence source for the block length = strlen(sq->name); meta->seq_data[numseqs].name_length = length; ESL_ALLOC (meta->seq_data[numseqs].name, (1+length) * sizeof(char)); length = strlen(sq->acc); meta->seq_data[numseqs].acc_length = length; ESL_ALLOC (meta->seq_data[numseqs].acc, (1+length) * sizeof(char)); length = strlen(sq->source); meta->seq_data[numseqs].source_length = length; ESL_ALLOC (meta->seq_data[numseqs].source, (1+length) * sizeof(char)); length = strlen(sq->desc); meta->seq_data[numseqs].desc_length = length; ESL_ALLOC (meta->seq_data[numseqs].desc, (1+length) * sizeof(char)); if (meta->seq_data[numseqs].name == NULL || meta->seq_data[numseqs].acc == NULL || meta->seq_data[numseqs].source == NULL || meta->seq_data[numseqs].desc == NULL) esl_fatal("unable to allocate memory to store FM meta data\n"); return eslOK; ERROR: return status; }
/* Function: p7_checkptmx_GrowTo() * Synopsis: Resize checkpointed DP matrix for new seq/model comparison. * * Purpose: Given an existing checkpointed matrix structure <ox>, * and the dimensions <M> and <L> of a new comparison, * reallocate and reinitialize <ox>. * * Essentially the same as free'ing the previous matrix and * creating a new one -- but minimizes expensive memory * allocation/reallocation calls. * * Usually <ox> only grows. The exception is if <ox> is * redlined (over its recommended allocation) and the new * problem size <M,L> can fit in the preset recommended * allocation, then <ox> is reallocated down to the smaller * recommended size. * * Args: ox - existing checkpointed matrix * M - new query profile length * L - new target sequence length * * Returns: <eslOK> on success. * * Throws: <eslEMEM> if an allocation fails. The state of <ox> is * now undefined, and the caller should not use it. */ int p7_checkptmx_GrowTo(P7_CHECKPTMX *ox, int M, int L) { int minR_chk = (int) ceil(minimum_rows(L)) + ox->R0; /* minimum number of DP rows needed */ int reset_dp_ptrs = FALSE; int maxR; int64_t W; /* minimum row width needed, bytes */ int r; int status; /* Validity of integer variable ranges may depend on design spec: */ ESL_DASSERT1( (M <= 100000) ); /* design spec says, model length M <= 100000 */ ESL_DASSERT1( (L <= 100000) ); /* ... and, seq length L <= 100000 */ ESL_DASSERT1( (L > 0) ); ESL_DASSERT1( (M > 0) ); /* If we're debugging and we have stored copies of any matrices, * grow them too. Must do this first, because we have an early exit * condition coming below. */ #ifdef p7_DEBUGGING if (ox->fwd && (status = p7_refmx_GrowTo(ox->fwd, M, L)) != eslOK) goto ERROR; if (ox->bck && (status = p7_refmx_GrowTo(ox->bck, M, L)) != eslOK) goto ERROR; if (ox->pp && (status = p7_refmx_GrowTo(ox->pp, M, L)) != eslOK) goto ERROR; #endif /* Calculate W, the minimum row width needed, in bytes */ W = sizeof(float) * P7_NVF(M) * p7C_NSCELLS * p7_VNF; /* vector part of row (MDI) */ W += ESL_UPROUND(sizeof(float) * p7C_NXCELLS, p7_VALIGN); /* float part of row (specials); must maintain p7_VALIGN-byte alignment */ /* Are current allocations satisfactory ? */ if (W <= ox->allocW && ox->nalloc <= ox->ramlimit) { if (L + ox->R0 <= ox->validR) { set_full (ox, L); return eslOK; } else if (minR_chk <= ox->validR) { set_checkpointed(ox, L, ox->validR); return eslOK; } } /* Do individual matrix rows need to expand? */ if ( W > ox->allocW) { ox->allocW = W; ox->validR = (int) (ox->nalloc / ox->allocW); /* validR must be <= allocR */ reset_dp_ptrs = TRUE; } /* Does matrix dp_mem need reallocation, either up or down? */ maxR = (int) (ox->nalloc / ox->allocW); /* max rows if we use up to the recommended allocation size. */ if ( (ox->nalloc > ox->ramlimit && minR_chk <= maxR) || /* we were redlined, and recommended alloc will work: so downsize */ minR_chk > ox->validR) /* not enough memory for needed rows: so upsize */ { set_row_layout(ox, L, maxR); ox->validR = ox->R0 + ox->Ra + ox->Rb + ox->Rc; /* this may be > allocR now; we'll reallocate dp[] next, if so */ ox->nalloc = ox->validR * ox->allocW; ESL_REALLOC(ox->dp_mem, ox->nalloc + (p7_VALIGN-1)); /* (p7_VALIGN-1) because we will manually align dpf ptrs into dp_mem */ reset_dp_ptrs = TRUE; } else /* current validR will suffice, either full or checkpointed; we still need to calculate a layout */ { if (L+ox->R0 <= ox->validR) set_full(ox, L); else set_checkpointed(ox, L, ox->validR); } /* Does the array of row ptrs need reallocation? */ if (ox->validR > ox->allocR) { ESL_REALLOC(ox->dpf, sizeof(float *) * ox->validR); ox->allocR = ox->validR; reset_dp_ptrs = TRUE; } /* Do the row ptrs need to be reset? */ if (reset_dp_ptrs) { ox->dpf[0] = (char *) ( ( (uintptr_t) ox->dp_mem + p7_VALIGN - 1) & p7_VALIMASK); /* vectors must be aligned on p7_VALIGN-byte boundary */ for (r = 1; r < ox->validR; r++) ox->dpf[r] = ox->dpf[0] + (r * ox->allocW); } return eslOK; ERROR: return status; }
/* Function: p7_anchorhash_Store() * Synopsis: Store a <P7_ANCHORS> array and get a key index for it. * * Purpose: Try to store anchor set <anch> in hash table <ah>. * Associate it with a unique key index, counting from * 0. This index lets us map the hashed data to * integer-based C arrays. Return the index through * <opt_index>. * * <D0> allows us to store suffixes, supporting the * segmental divide and conquer version of the MPAS * algorithm. Do not store the first <D0> anchors; only * store <D0+1..D>. To store the complete anchor set, pass * <D0=0>. * * If an identical anchor set is already stored in <ah>, * set <*opt_index> to the key for that anchor set, and * return <eslEDUP>. * * Increment <ah->key_count[]> counter every time we call * <_Store()> on a given anchorset suffix (not counting * D0). This collects the observed frequency of sampling * the anchorset suffix, which we can compare to its * calculated probability. * * Args: ah : hash table holding different anchor sets * anch : new anchor set to try to store * D0 : ignore first <D0> anchors, store <D0+1..D> (0 = store all) * opt_index : optRETURN: index of stored data * * Returns: <eslOK> if <anch> is new; the anchor set data are stored, * and <opt_index>, if requested, is set to the lookup * key index for the stored data. * * <eslEDUP> if this anchor set has already been stored before; * <opt_index>, if requested, is set to the lookup key * index of the previously stored data. * * Throws: <eslEMEM> on allocation failure. */ int p7_anchorhash_Store(P7_ANCHORHASH *ah, const P7_ANCHORS *anch, int D0, int32_t *opt_index) { uint32_t val = anchorhash_function(anch->a + D0, anch->D - D0, ah->hashsize); int32_t *ptr; int32_t idx; int32_t d; int status; /* Was this key already stored? */ for (idx = ah->hashtable[val]; idx != -1; idx = ah->nxt[idx]) { if (anchorhash_compare(anch->a + D0, anch->D - D0, ah->amem + ah->key_offset[idx]) == eslOK) { ah->key_count[idx]++; if (opt_index) *opt_index = idx; return eslEDUP; } } /* Reallocate key memory if needed */ if (ah->nkeys == ah->kalloc) { ESL_REALLOC(ah->key_offset, sizeof(int32_t) * ah->kalloc * 2); ESL_REALLOC(ah->key_count, sizeof(int32_t) * ah->kalloc * 2); ESL_REALLOC(ah->nxt, sizeof(int32_t) * ah->kalloc * 2); ah->kalloc *= 2; } /* Reallocate key data memory if needed (by doubling) */ while (ah->an + 2 * (anch->D - D0) + 1 > ah->aalloc) { ESL_REALLOC(ah->amem, sizeof(int32_t) * ah->aalloc * 2); ah->aalloc *= 2; } /* Copy the key, assign its index */ idx = ah->nkeys; ah->key_offset[idx] = ah->an; ah->key_count[idx] = 1; // Not ++. This is an initialization. ah->an += 2 * (anch->D - D0) + 1; ah->nkeys++; ptr = ah->amem + ah->key_offset[idx]; *ptr = anch->D - D0; for (d = D0 + 1; d <= anch->D; d++) { ptr++; *ptr = anch->a[d].i0; ptr++; *ptr = anch->a[d].k0; } /* anchorhash needs to remember L,M so when caller asks * to _Get() an anchor set, anchorhash can set the sentinels * correctly. Fortunately even when we're only storing a * suffix of <anch>, we still get the whole <anch> object, * which has valid sentinels, so we can deduce from them * what L,M are. */ if (ah->nkeys == 1) p7_anchor_GetSentinels(anch->a, anch->D, &(ah->L), &(ah->M)); ESL_DASSERT1(( anch->a[anch->D+1].i0 = ah->L+1 )); ESL_DASSERT1(( anch->a[0].k0 = ah->M+1 )); /* Insert new element at head of the approp chain in hashtable */ ah->nxt[idx] = ah->hashtable[val]; ah->hashtable[val] = idx; /* Time to upsize? If we're 3x saturated, expand the hash table */ if (ah->nkeys > 3 * ah->hashsize) if ((status = anchorhash_upsize(ah)) != eslOK) goto ERROR; if (opt_index) *opt_index = idx; return eslOK; ERROR: if (opt_index) *opt_index = -1; return status; }
int p7_sparsemask_Finish_avx512(P7_SPARSEMASK *sm) { #ifdef HAVE_AVX512 int i,r; int s; int status; //printf("calling p7_sparsemask_Finish, sm->ncells = %li\n", sm->ncells); /* Reverse kmem. */ int *p_AVX_512; esl_vec_IReverse(sm->kmem_AVX_512, sm->kmem_AVX_512, sm->ncells_AVX_512); /* Set the k[] pointers; count <S> and <nrow> */ p_AVX_512 = sm->kmem_AVX_512; sm->S_AVX_512 = sm->nrow_AVX_512 = 0; for (i = 1; i <= sm->L; i++){ if (sm->n_AVX_512[i]) { sm->nrow_AVX_512++; sm->k_AVX_512[i] = p_AVX_512; p_AVX_512 += sm->n_AVX_512[i]; if (sm->n_AVX_512[i-1] == 0) sm->S_AVX_512++; } else sm->k_AVX_512[i] = NULL; } /* Reallocate seg[] if needed. */ if ( (sm->S_AVX_512+2) > sm->salloc_AVX_512) { ESL_REALLOC(sm->seg_AVX_512, (sm->S_AVX_512+2) * sizeof(p7_sparsemask_seg_s)); /* +2, for sentinels */ sm->salloc_AVX_512 = sm->S_AVX_512 + 2; // inclusive of sentinels sm->n_srealloc++; } /* Set seg[] coord pairs. */ sm->seg_AVX_512[0].ia = sm->seg_AVX_512[0].ib = -1; for (s = 1, i = 1; i <= sm->L; i++) { if (sm->n_AVX_512[i] && sm->n_AVX_512[i-1] == 0) sm->seg_AVX_512[s].ia = i; if (sm->n_AVX_512[i] && (i == sm->L || sm->n_AVX_512[i+1] == 0)) sm->seg_AVX_512[s++].ib = i; } ESL_DASSERT1(( s == sm->S_AVX_512+1 )); sm->seg_AVX_512[s].ia = sm->seg_AVX_512[s].ib = sm->L+2; sm->last_i_AVX_512 = -1; for (r = 0; r < p7_VNF_AVX_512; r++) sm->last_k_AVX_512[r] = -1; // if we're running AVX-512 code and not SSE, need to copy some values into the SSE data structure // so the downstream code will see them sm->seg = sm->seg_AVX_512; sm->k = sm->k_AVX_512; sm->n = sm->n_AVX_512; sm->kmem = sm->kmem_AVX_512; sm->S = sm->S_AVX_512; sm->nrow = sm->nrow_AVX_512; sm->ncells = sm->ncells_AVX_512; return eslOK; ERROR: return eslEMEM; #endif #ifndef HAVE_AVX512 return eslENORESULT; #endif }
/* Function: p7_hmmcache_Open() * Synopsis: Cache a profile database. * * Purpose: Open <hmmfile> and read all of its contents, creating * a cached profile database in memory. Return a ptr to the * cached profile database in <*ret_cache>. * * Caller may optionally provide an <errbuf> ptr to * at least <eslERRBUFSIZE> bytes, to capture an * informative error message on failure. * * Args: hmmfile - (base) name of profile file to open * ret_cache - RETURN: cached profile database * errbuf - optRETURN: error message for a failure * * Returns: <eslOK> on success. <*ret_cache> points to the * cached db. <errbuf> is unchanged. * * Failure codes: * <eslENOTFOUND> : <hmmfile> couldn't be opened for reading * <eslEFORMAT> : <hmmfile> isn't in recognized HMMER file format * <eslEINCOMPAT> : profiles in <hmmfile> have different alphabets * * On any failure, <*ret_cache> is <NULL> and <errbuf> contains * an informative error message for the user. * * Throws: <eslEMEM> : memory allocation error. */ int p7_hmmcache_Open(char *hmmfile, P7_HMMCACHE **ret_cache, char *errbuf) { P7_HMMCACHE *cache = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; int status; if (errbuf) errbuf[0] = '\0'; ESL_ALLOC(cache, sizeof(P7_HMMCACHE)); cache->name = NULL; cache->abc = NULL; cache->omlist = NULL; cache->gmlist = NULL; cache->lalloc = 4096; /* allocation chunk size for <list> of ptrs */ cache->n = 0; if ( ( status = esl_strdup(hmmfile, -1, &cache->name) != eslOK)) goto ERROR; ESL_ALLOC(cache->omlist, sizeof(P7_OPROFILE *) * cache->lalloc); ESL_ALLOC(cache->gmlist, sizeof(P7_PROFILE *) * cache->lalloc); if ( (status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf)) != eslOK) goto ERROR; // eslENOTFOUND | eslEFORMAT; <errbuf> while ((status = p7_hmmfile_Read(hfp, &(cache->abc), &hmm)) != eslEOF) // eslEFORMAT | eslEINCOMPAT; <errbuf> { if (status != eslOK) ESL_XFAIL(status, errbuf, "%s", hfp->errbuf); if (!bg && (bg = p7_bg_Create(cache->abc)) == NULL) { status = eslEMEM; goto ERROR; } if ( ( gm = p7_profile_Create(hmm->M, cache->abc)) == NULL) { status = eslEMEM; goto ERROR; } if ( (status = p7_profile_Config(gm, hmm, bg)) != eslOK) goto ERROR; if ( (status = p7_oprofile_ReadMSV (hfp, &(cache->abc), &om)) != eslOK || /* eslEFORMAT: hfp->errbuf | eslEINCOMPAT | eslEOF */ (status = p7_oprofile_ReadRest(hfp, om)) != eslOK) /* eslEFORMAT: hfp->errbuf */ { if (status == eslEOF) ESL_XFAIL(eslEFORMAT, errbuf, "Premature EOF in vectorized profile files"); else goto ERROR; } ESL_DASSERT1(( strcmp(gm->name, om->name) == 0 )); if (cache->n >= cache->lalloc) { ESL_REALLOC(cache->gmlist, sizeof(P7_PROFILE *) * cache->lalloc * 2); ESL_REALLOC(cache->omlist, sizeof(P7_OPROFILE *) * cache->lalloc * 2); cache->lalloc *= 2; } cache->omlist[cache->n] = om; cache->gmlist[cache->n] = gm; cache->n++; om = NULL; gm = NULL; p7_hmm_Destroy(hmm); } //printf("\nfinal:: %d memory %" PRId64 "\n", inx, total_mem); p7_hmmfile_Close(hfp); p7_bg_Destroy(bg); *ret_cache = cache; return eslOK; ERROR: if (cache) p7_hmmcache_Close(cache); if (om) p7_oprofile_Destroy(om); if (gm) p7_profile_Destroy(gm); if (hmm) p7_hmm_Destroy(hmm); if (bg) p7_bg_Destroy(bg); if (hfp) p7_hmmfile_Close(hfp); return status; }
/* Function: esl_msafile_psiblast_Read() * Synopsis: Read an alignment in PSI-BLAST's input format. * * Purpose: Read an MSA from an open <ESLX_MSAFILE> <afp>, parsing for * PSI-BLAST input format, starting from the current point. * Create a new multiple alignment, and return a ptr to * that alignment via <*ret_msa>. Caller is responsible for * free'ing this <ESL_MSA>. * * The <msa> has a reference line (<msa->rf[]>) that * corresponds to the uppercase/lowercase columns in the * alignment: consensus (uppercase) columns are marked 'x', * and insert (lowercase) columns are marked '.' in this RF * line. * * Args: afp - open <ESL_MSAFILE> * ret_msa - RETURN: newly parsed <ESL_MSA> * * Returns: <eslOK> on success. <*ret_msa> contains the newly * allocated MSA. <afp> is at EOF. * * <eslEOF> if no (more) alignment data are found in * <afp>, and <afp> is returned at EOF. * * <eslEFORMAT> on a parse error. <*ret_msa> is set to * <NULL>. <afp> contains information sufficient for * constructing useful diagnostic output: * | <afp->errmsg> | user-directed error message | * | <afp->linenumber> | line # where error was detected | * | <afp->line> | offending line (not NUL-term) | * | <afp->n> | length of offending line | * | <afp->bf->filename> | name of the file | * and <afp> is poised at the start of the following line, * so (in principle) the caller could try to resume * parsing. * * Throws: <eslEMEM> on allocation error. * <eslESYS> if a system call fails, such as fread(). * <eslEINCONCEIVABLE> - "impossible" corruption * On these, <*ret_msa> is returned <NULL>, and the state of * <afp> is undefined. */ int esl_msafile_psiblast_Read(ESLX_MSAFILE *afp, ESL_MSA **ret_msa) { ESL_MSA *msa = NULL; int idx = 0; /* counter over sequences in a block */ int nblocks = 0; /* counter over blocks */ int64_t alen = 0; int nseq = 0; int64_t cur_alen; esl_pos_t pos; /* position on a line */ esl_pos_t name_start, name_len; esl_pos_t seq_start, seq_len; esl_pos_t block_seq_start, block_seq_len; int status; ESL_DASSERT1( (afp->format == eslMSAFILE_PSIBLAST) ); afp->errmsg[0] = '\0'; /* allocate a growable MSA. We set msa->{nseq,alen} only when we're done. */ #ifdef eslAUGMENT_ALPHABET if (afp->abc && (msa = esl_msa_CreateDigital(afp->abc, 16, -1)) == NULL) { status = eslEMEM; goto ERROR; } #endif if (! afp->abc && (msa = esl_msa_Create( 16, -1)) == NULL) { status = eslEMEM; goto ERROR; } /* skip leading blank lines in file */ while ( (status = eslx_msafile_GetLine(afp, NULL, NULL)) == eslOK && esl_memspn(afp->line, afp->n, " \t") == afp->n) ; if (status != eslOK) goto ERROR; /* includes normal EOF */ /* Read the file a line at a time; if a parsing error occurs, detect immediately, with afp->linenumber set correctly */ do { /* while in the file... */ idx = 0; do { /* while in a block... */ for (pos = 0; pos < afp->n; pos++) if (! isspace(afp->line[pos])) break; name_start = pos; for (pos = pos+1; pos < afp->n; pos++) if ( isspace(afp->line[pos])) break; name_len = pos - name_start; for (pos = pos+1; pos < afp->n; pos++) if (! isspace(afp->line[pos])) break; seq_start = pos; if (pos >= afp->n) ESL_XFAIL(eslEFORMAT, afp->errmsg, "invalid alignment line"); for (pos = afp->n-1; pos > 0; pos--) if (! isspace(afp->line[pos])) break; seq_len = pos - seq_start + 1; if (idx == 0) { block_seq_start = seq_start; block_seq_len = seq_len; } else { if (seq_start != block_seq_start) ESL_XFAIL(eslEFORMAT, afp->errmsg, "sequence start is misaligned"); if (seq_len != block_seq_len) ESL_XFAIL(eslEFORMAT, afp->errmsg, "sequence end is misaligned"); } /* Process the consensus #=RF line. */ if (idx == 0) { ESL_REALLOC(msa->rf, sizeof(char) * (alen + seq_len + 1)); for (pos = 0; pos < seq_len; pos++) msa->rf[alen+pos] = '-'; /* anything neutral other than . or x will do. */ msa->rf[alen+pos] = '\0'; } for (pos = 0; pos < seq_len; pos++) { if (afp->line[seq_start+pos] == '-') continue; if (isupper(afp->line[seq_start+pos])) { if (msa->rf[alen+pos] == '.') ESL_XFAIL(eslEFORMAT, afp->errmsg, "unexpected upper case residue (#%d on line)", (int) pos+1); msa->rf[alen+pos] = 'x'; } if (islower(afp->line[seq_start+pos])) { if (msa->rf[alen+pos] == 'x') ESL_XFAIL(eslEFORMAT, afp->errmsg, "unexpected lower case residue (#%d on line)", (int) pos+1); msa->rf[alen+pos] = '.'; } } /* Store the sequence name. */ if (nblocks == 0) { /* make sure we have room for another sequence */ if (idx >= msa->sqalloc && (status = esl_msa_Expand(msa)) != eslOK) goto ERROR; if ( (status = esl_msa_SetSeqName(msa, idx, afp->line+name_start, name_len)) != eslOK) goto ERROR; } else { if (! esl_memstrcmp(afp->line+name_start, name_len, msa->sqname[idx])) ESL_XFAIL(eslEFORMAT, afp->errmsg, "expected sequence %s on this line, but saw %.*s", msa->sqname[idx], (int) name_len, afp->line+name_start); } /* Append the sequence. */ cur_alen = alen; #ifdef eslAUGMENT_ALPHABET if (msa->abc) { status = esl_abc_dsqcat(afp->inmap, &(msa->ax[idx]), &(cur_alen), afp->line+seq_start, seq_len); } #endif if (! msa->abc) { status = esl_strmapcat (afp->inmap, &(msa->aseq[idx]), &(cur_alen), afp->line+seq_start, seq_len); } if (status == eslEINVAL) ESL_XFAIL(eslEFORMAT, afp->errmsg, "one or more invalid sequence characters"); else if (status != eslOK) goto ERROR; if (cur_alen - alen != seq_len) ESL_XFAIL(eslEFORMAT, afp->errmsg, "unexpected number of seq characters"); /* get next line. if it's blank, or if we're EOF, we're done with the block */ idx++; status = eslx_msafile_GetLine(afp, NULL, NULL); } while (status == eslOK && esl_memspn(afp->line, afp->n, " \t") < afp->n); /* blank line ends a block. */ if (status != eslOK && status != eslEOF) goto ERROR; /* End of one block */ if (nblocks == 0) nseq = idx; else if (idx != nseq) ESL_XFAIL(eslEFORMAT, afp->errmsg, "last block didn't contain same # of seqs as earlier blocks"); alen += block_seq_len; nblocks++; /* skip blank lines to start of next block, if any */ while ( (status = eslx_msafile_GetLine(afp, NULL, NULL)) == eslOK && esl_memspn(afp->line, afp->n, " \t") == afp->n) ; } while (status == eslOK); if (status != eslEOF) goto ERROR; msa->nseq = nseq; msa->alen = alen; if (( status = esl_msa_SetDefaultWeights(msa)) != eslOK) goto ERROR; *ret_msa = msa; return eslOK; ERROR: if (msa) esl_msa_Destroy(msa); *ret_msa = NULL; return status; }
/* Function: esl_msafile_a2m_Read() * Synopsis: Read a UCSC A2M format alignment. * * Purpose: Read an MSA from an open <ESL_MSAFILE> <afp>, parsing * for UCSC A2M (SAM) format. Create a new MSA, * and return a ptr to it in <*ret_msa>. Caller is responsible * for freeing this <ESL_MSA>. * * The <msa> has a reference line (<msa->rf[]>) that * corresponds to the uppercase/lowercase columns in the * alignment: consensus (uppercase) columns are marked 'X', * and insert (lowercase) columns are marked '.' in the RF * annotation line. * * This input parser can deal both with "dotless" A2M, and * full A2M format with dots. * * Args: afp - open <ESL_MSAFILE> * ret_msa - RETURN: newly parsed <ESL_MSA> * * Returns: <eslOK> on success. <*ret_msa> is set to the newly * allocated MSA, and <afp> is at EOF. * * <eslEOF> if no (more) alignment data are found in * <afp>, and <afp> is returned at EOF. * * <eslEFORMAT> on a parse error. <*ret_msa> is set to * <NULL>. <afp> contains information sufficient for * constructing useful diagnostic output: * | <afp->errmsg> | user-directed error message | * | <afp->linenumber> | line # where error was detected | * | <afp->line> | offending line (not NUL-term) | * | <afp->n> | length of offending line | * | <afp->bf->filename> | name of the file | * and <afp> is poised at the start of the following line, * so (in principle) the caller could try to resume * parsing. * * Throws: <eslEMEM> - an allocation failed. * <eslESYS> - a system call such as fread() failed * <eslEINCONCEIVABLE> - "impossible" corruption * On these, <*ret_msa> is returned <NULL>, and the state of * <afp> is undefined. */ int esl_msafile_a2m_Read(ESL_MSAFILE *afp, ESL_MSA **ret_msa) { ESL_MSA *msa = NULL; char **csflag = NULL; /* csflag[i][pos] is TRUE if aseq[i][pos] was uppercase consensus */ int *nins = NULL; /* # of inserted residues before each consensus col [0..ncons-1] */ int *this_nins = NULL; /* # of inserted residues before each consensus residue in this seq */ int nseq = 0; int ncons = 0; int idx; int64_t thislen; int64_t spos; int this_ncons; int cpos, bpos; char *p, *tok; esl_pos_t n, toklen; int status; ESL_DASSERT1( (afp->format == eslMSAFILE_A2M) ); afp->errmsg[0] = '\0'; #ifdef eslAUGMENT_ALPHABET if (afp->abc && (msa = esl_msa_CreateDigital(afp->abc, 16, -1)) == NULL) { status = eslEMEM; goto ERROR; } #endif if (! afp->abc && (msa = esl_msa_Create( 16, -1)) == NULL) { status = eslEMEM; goto ERROR; } ESL_ALLOC(csflag, sizeof(char *) * msa->sqalloc); for (idx = 0; idx < msa->sqalloc; idx++) csflag[idx] = NULL; /* skip leading blank lines in file */ while ( (status = esl_msafile_GetLine(afp, &p, &n)) == eslOK && esl_memspn(afp->line, afp->n, " \t") == afp->n) ; if (status != eslOK) goto ERROR; /* includes normal EOF */ /* tolerate sloppy space at start of name/desc line */ while (n && isspace(*p)) { p++; n--; } if (*p != '>') ESL_XFAIL(eslEFORMAT, afp->errmsg, "expected A2M name/desc line starting with >"); do { /* for each record starting in '>': */ p++; n--; /* advance past > */ if ( (status = esl_memtok(&p, &n, " \t", &tok, &toklen)) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "no name found for A2M record"); if (nseq >= msa->sqalloc) { int old_sqalloc = msa->sqalloc; if ( (status = esl_msa_Expand(msa)) != eslOK) goto ERROR; ESL_REALLOC(csflag, sizeof(char *) * msa->sqalloc); for (idx = old_sqalloc; idx < msa->sqalloc; idx++) csflag[idx] = NULL; } if ( (status = esl_msa_SetSeqName (msa, nseq, tok, toklen)) != eslOK) goto ERROR; if (n && (status = esl_msa_SetSeqDescription(msa, nseq, p, n)) != eslOK) goto ERROR; /* now for each sequence line... */ thislen = 0; /* count of lowercase, uppercase, and '-': w/o dots, on first pass */ this_ncons = 0; /* count of uppercase + '-': number of consensus columns in alignment: must match for all seqs */ if (nseq) { for (cpos = 0; cpos <= ncons; cpos++) // A little tricksy. <this_nins> is allocated on first seq, when nseq=0. this_nins[cpos] = 0; // cppcheck gets confused and erroneously calls "possible null pointer deference"; ignore it. } while ( (status = esl_msafile_GetLine(afp, &p, &n)) == eslOK) { while (n && isspace(*p)) { p++; n--; } /* tolerate and skip leading whitespace on line */ if (n == 0) continue; /* tolerate and skip blank lines */ if (*p == '>') break; ESL_REALLOC(csflag[nseq], sizeof(char) * (thislen + n + 1)); /* might be an overalloc by a bit, depending on whitespace on line */ if (nseq == 0) { ESL_REALLOC(this_nins, sizeof(int) * (this_ncons + n + 1)); for (cpos = this_ncons; cpos <= this_ncons+n; cpos++) this_nins[cpos] = 0; } for (spos = thislen, bpos = 0; bpos < n; bpos++) { if (p[bpos] == 'O') continue; else if (isupper(p[bpos])) { csflag[nseq][spos++] = TRUE; this_ncons++; } else if (islower(p[bpos])) { csflag[nseq][spos++] = FALSE; this_nins[this_ncons]++; } else if (p[bpos] == '-') { csflag[nseq][spos++] = TRUE; this_ncons++; } if (ncons && this_ncons > ncons) ESL_XFAIL(eslEFORMAT, afp->errmsg, "unexpected # of consensus residues, didn't match previous seq(s)"); } csflag[nseq][spos] = TRUE; /* need a sentinel, because of the way the padding functions work */ #ifdef eslAUGMENT_ALPHABET if (msa->abc) { status = esl_abc_dsqcat(afp->inmap, &(msa->ax[nseq]), &thislen, p, n); } #endif if (! msa->abc) { status = esl_strmapcat (afp->inmap, &(msa->aseq[nseq]), &thislen, p, n); } if (status == eslEINVAL) ESL_XFAIL(eslEFORMAT, afp->errmsg, "one or more invalid sequence characters"); else if (status != eslOK) goto ERROR; ESL_DASSERT1( (spos == thislen) ); } if (status != eslOK && status != eslEOF) goto ERROR; /* exception thrown by esl_msafile_GetLine() */ /* status == OK: then *p == '>'. status == eslEOF: we're eof. status == anything else: error */ /* Finished reading a sequence record. */ if (nseq == 0) { ncons = this_ncons; ESL_ALLOC(nins, sizeof(int) * (ncons+1)); for (cpos = 0; cpos <= ncons; cpos++) nins[cpos] = this_nins[cpos]; } else { if (this_ncons != ncons) ESL_XFAIL(eslEFORMAT, afp->errmsg, "unexpected # of consensus residues, didn't match previous seq(s)"); for (cpos = 0; cpos <= ncons; cpos++) nins[cpos] = ESL_MAX(nins[cpos], this_nins[cpos]); } nseq++; } while (status == eslOK); /* Now we have nseq *unaligned* sequences in ax/aseq[0..nseq-1]; call the length slen, though we don't explicitly store it * csflag[idx][spos] tells us whether each unaligned residue is an insertion or consensus, for spos==0..slen-1. * nins[0..ncons] tells us the max number of inserted residues before each consensus column * This is sufficient information to reconstruct each aligned sequence. */ msa->nseq = nseq; #ifdef eslAUGMENT_ALPHABET if (msa->abc) { if ((status = a2m_padding_digital(msa, csflag, nins, ncons)) != eslOK) goto ERROR; } #endif if (!msa->abc) { if ((status = a2m_padding_text (msa, csflag, nins, ncons)) != eslOK) goto ERROR; } if (( status = esl_msa_SetDefaultWeights(msa)) != eslOK) goto ERROR; *ret_msa = msa; free(nins); free(this_nins); for (idx = 0; idx < msa->nseq; idx++) free(csflag[idx]); free(csflag); return eslOK; ERROR: if (nins) free(nins); if (this_nins) free(this_nins); if (csflag) { for (idx = 0; idx < msa->nseq; idx++) if (csflag[idx]) free(csflag[idx]); free(csflag); } if (msa) esl_msa_Destroy(msa); return status; }
/* Function: p7_coords2_hash_Store() * Synopsis: Store a <P7_COORDS2> array and get a key index for it. * * Purpose: In the hash table <ch>, store the array of coordinate * pairs in <c2>. Associate it with a unique key index, * counting from 0. This index lets us map the hashed data * to integer-based C arrays. Return the index through <opt_index>. * * If an identical array of paired coords has already been * stored, then set <*opt_index> to the index of where the * data were already stored, and return <eslEDUP> * * Args: ch : hash table holding different arrays of coord pairs * c2 : new array of coord pairs to try to store * opt_index : optRETURN: index of stored data * * Returns: <eslOK> if <seg>/<nseg> is new; the data are stored, * and <opt_index>, if requested, is set to the lookup * key index for the stored data. * * <eslEDUP> if <seg>/<nseg> has already been stored before; * <opt_index>, if requested, is set to the lookup key * index of the previously stored data. * * Throws: <eslEMEM> on allocation failure. */ int p7_coords2_hash_Store(P7_COORDS2_HASH *ch, const P7_COORDS2 *c2, int32_t *opt_index) { uint32_t val = p7_coords2_hash_function(c2->arr, c2->n, ch->hashsize); int32_t *ptr; int32_t idx; int32_t d; int status; /* Was this key already stored? */ for (idx = ch->hashtable[val]; idx != -1; idx = ch->nxt[idx]) { if (p7_coords2_hash_compare(c2->arr, c2->n, ch->cmem + ch->key_offset[idx]) == eslOK) { if (opt_index) *opt_index = idx; return eslEDUP; } } /* Reallocate key memory if needed */ if (ch->nkeys == ch->kalloc) { ESL_REALLOC(ch->key_offset, sizeof(int32_t) * ch->kalloc * 2); ESL_REALLOC(ch->nxt, sizeof(int32_t) * ch->kalloc * 2); ch->kalloc *= 2; } /* Reallocate key data memory if needed */ while (ch->cn + 2 * c2->n + 1 > ch->calloc) { ESL_REALLOC(ch->cmem, sizeof(int32_t) * ch->calloc * 2); ch->calloc *= 2; } /* Copy the key, assign its index */ idx = ch->nkeys; ch->key_offset[idx] = ch->cn; ch->cn += 2 * c2->n + 1; ch->nkeys++; ptr = ch->cmem + ch->key_offset[idx]; *ptr = c2->n; for (d = 0; d < c2->n; d++) { ptr++; *ptr = c2->arr[d].n1; ptr++; *ptr = c2->arr[d].n2; } /* Insert new element at head of the approp chain in hashtable */ ch->nxt[idx] = ch->hashtable[val]; ch->hashtable[val] = idx; /* Time to upsize? If we're 3x saturated, expand the hash table */ if (ch->nkeys > 3 * ch->hashsize) if ((status = p7_coords2_hash_upsize(ch)) != eslOK) goto ERROR; if (opt_index) *opt_index = idx; return eslOK; ERROR: if (opt_index) *opt_index = -1; return status; }
/* regurgitate_pfam_as_pfam() * * Given an open Pfam formatted msafile, read the next alignment and * regurgitate it, after modifying it as necessary (change dna to rna, * wussify SS, etc) in Pfam format. * * Returns <eslOK> on success. * Returns <eslEOF> if there are no more alignments in <afp>. * Returns <eslEFORMAT> if parse fails because of a file format * problem, in which case afp->errmsg is set to contain a formatted * message that indicates the cause of the problem. */ static int regurgitate_pfam_as_pfam(ESLX_MSAFILE *afp, FILE *ofp, char *gapsym, int force_lower, int force_upper, int force_rna, int force_dna, int iupac_to_n, int x_is_bad, int wussify, int dewuss, int fullwuss, char *rfrom, char *rto) { char *p; esl_pos_t n; char *first_seqname = NULL; char *gx = NULL; char *seqname = NULL; char *tag = NULL; char *text = NULL; esl_pos_t gxlen, namelen, taglen, textlen; int nseq_read = 0; int parse_gc_and_gr; int flushpoint = 10000; int exp_alen = -1; char *buf = NULL; esl_pos_t pos, pos2; int status; parse_gc_and_gr = (wussify || dewuss || fullwuss) ? TRUE : FALSE; /* should we parse out GR/GC lines and check if they're SS lines? */ afp->errmsg[0] = '\0'; /* Check the magic Stockholm header line. * We have to skip blank lines here, else we perceive * trailing blank lines in a file as a format error when * reading in multi-record mode. */ /* Check the magic Stockholm header line, allowing blank lines */ do { status = eslx_msafile_GetLine(afp, &p, &n); if (status == eslEOF) return eslEOF; else if (status != eslOK) esl_fatal("small mem parse error. problem reading line %d of msafile", (int) afp->linenumber); fprintf(ofp, "%.*s\n", (int) afp->n, afp->line); } while (esl_memspn(afp->line, afp->n, " \t") == afp->n || /* skip blank lines */ (esl_memstrpfx(afp->line, afp->n, "#") /* and skip comment lines */ && ! esl_memstrpfx(afp->line, afp->n, "# STOCKHOLM"))); /* but stop on Stockholm header */ if (! esl_memstrpfx(afp->line, afp->n, "# STOCKHOLM 1.")) esl_fatal("small mem parse failed (line %d): missing \"# STOCKHOLM\" header", (int) afp->linenumber); /* Read the alignment file one line at a time. */ while ((status = eslx_msafile_GetLine(afp, &p, &n)) == eslOK) { if ((int) afp->linenumber % flushpoint == 0) fflush(ofp); while (n && ( *p == ' ' || *p == '\t')) { p++; n--; } /* skip leading whitespace */ if (!n) fprintf(ofp, "\n"); else if (esl_memstrpfx(p, n, "//")) { fprintf(ofp, "//\n"); break; } /* normal way out */ else if (*p == '#') { if (parse_gc_and_gr && esl_memstrpfx(p, n, "#=GC")) { /* parse line into temporary strings */ if (esl_memtok(&p, &n, " \t", &gx, &gxlen) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "small mem parse failed (line %d): bad #=GC line", (int) afp->linenumber); if (esl_memtok(&p, &n, " \t", &tag, &taglen) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "small mem parse failed (line %d): bad #=GC line", (int) afp->linenumber); if (esl_memtok(&p, &n, " \t", &text, &textlen) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "small mem parse failed (line %d): bad #=GC line", (int) afp->linenumber); pos = text - afp->line; /* pos: position of first aligned char on line; total width of annotation tag w/spaces */ /* verify alignment length */ if (exp_alen == -1) exp_alen = textlen; else if (exp_alen != textlen) ESL_XFAIL(eslEFORMAT, afp->errmsg, "small mem parse failed (line %d): bad #=GC line, len %d, expected %d", (int) afp->linenumber, (int) textlen, (int) exp_alen); /* we need to make a writable string copy of the annotation, to edit it */ ESL_REALLOC(buf, sizeof(char) * (textlen+1)); esl_memstrcpy(text, textlen, buf); if (esl_memstrcmp(tag, taglen, "SS_cons")) { if (wussify) esl_kh2wuss(buf, buf); else if (dewuss) esl_wuss2kh(buf, buf); else if (fullwuss) { status = esl_wuss_full(buf, buf); if (status == eslESYNTAX) esl_fatal("Bad SS_cons line: not in WUSS format, alifile line: %d", (int) afp->linenumber); else if (status != eslOK) esl_fatal("Conversion of SS_cons line failed, code %d, alifile line: %d", status, (int) afp->linenumber); } } fprintf(ofp, "#=GC %.*s%*s%s\n", (int) taglen, tag, (int) (pos-taglen-5), "", buf); } else if (parse_gc_and_gr && esl_memstrpfx(p, n, "#=GR") == 0) { /* parse line into temporary strings */ if (esl_memtok(&p, &n, " \t", &gx, &gxlen) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "--small parse failed (line %d): bad #=GR line", (int) afp->linenumber); if (esl_memtok(&p, &n, " \t", &seqname, &namelen) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "--small parse failed (line %d): bad #=GR line", (int) afp->linenumber); if (esl_memtok(&p, &n, " \t", &tag, &taglen) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "--small parse failed (line %d): bad #=GR line", (int) afp->linenumber); pos = tag - afp->line; if (esl_memtok(&p, &n, " \t", &text, &textlen) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "--small parse failed (line %d): bad #=GR line", (int) afp->linenumber); pos2 = text - afp->line; /* we need to make a writable string copy of the annotation, to edit it */ ESL_REALLOC(buf, sizeof(char) * (textlen+1)); esl_memstrcpy(text, textlen, buf); /* verify alignment length */ if (exp_alen == -1) exp_alen = textlen; else if (exp_alen != textlen) ESL_XFAIL(eslEFORMAT, afp->errmsg, "small mem parse failed (line %d): bad seq line, len %d, expected %d", (int) afp->linenumber, (int) textlen, (int) exp_alen); if (esl_memstrcmp(tag, taglen, "SS") == 0) { if (wussify) esl_kh2wuss(buf, buf); else if (dewuss) esl_wuss2kh(buf, buf); else if (fullwuss) { status = esl_wuss_full(buf, buf); if (status == eslESYNTAX) esl_fatal("Bad SS line: not in WUSS format, alifile line: %d", (int) afp->linenumber); else if (status != eslOK) esl_fatal("Conversion of SS line failed, code %d, alifile line: %d", status, (int) afp->linenumber); } } fprintf(ofp, "#=GR %.*s%*s%.*s%*s%s\n", (int) namelen, seqname, (int) (pos-namelen-5), "", (int) taglen, tag, (int) (pos2-pos-taglen), "", buf); } else { /* '#' prefixed line that is not #=GR (or it is #=GR and wussify,dewuss,fullwuss are all FALSE) */ fprintf(ofp, "%.*s\n", (int) afp->n, afp->line); /* print the line */ } } /* end of 'if (*s == '#')' */ else { /* sequence line */ if (esl_memtok(&p, &n, " \t", &seqname, &namelen) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "--small parse failed (line %d): bad sequence line", (int) afp->linenumber); if (esl_memtok(&p, &n, " \t", &text, &textlen) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "--small parse failed (line %d): bad sequence line", (int) afp->linenumber); pos = text - afp->line; /* verify alignment length */ if (exp_alen == -1) exp_alen = textlen; else if(exp_alen != textlen) ESL_XFAIL(eslEFORMAT, afp->errmsg, "small mem parse failed (line %d): bad seq line, len %d, expected %d", (int) afp->linenumber, (int) textlen, (int) exp_alen); /* make sure we haven't just read a second line of the first sequence in file (we must be in Pfam 1 line/seq file) */ if (nseq_read == 0) { if ((status = esl_memstrdup(seqname, namelen, &(first_seqname))) != eslOK) goto ERROR; } else if (esl_memstrcmp(seqname, namelen, first_seqname)) { ESL_XFAIL(eslEFORMAT, afp->errmsg, "parse failed (line %d): two seqs named %s. Alignment appears to be in Stockholm format. Reformat to Pfam with esl-reformat.", (int) afp->linenumber, seqname); } nseq_read++; /* we need to make a writable string copy of the annotation, to edit it */ ESL_REALLOC(buf, sizeof(char) * (textlen+1)); esl_memstrcpy(text, textlen, buf); /* make adjustments as necessary */ if (rfrom) symconvert(buf, rfrom, rto); if (gapsym) symconvert(buf, "-_.", gapsym); if (force_lower) symconvert(buf, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"); if (force_upper) symconvert(buf, "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); if (force_rna) symconvert(buf, "Tt", "Uu"); if (force_dna) symconvert(buf, "Uu", "Tt"); if (iupac_to_n) symconvert(buf, "RYMKSWHBVDrymkswhbvd", "NNNNNNNNNNnnnnnnnnnn"); if (x_is_bad) symconvert(buf, "Xx", "Nn"); /* print it out */ fprintf(ofp, "%.*s%*s%s\n", (int) namelen, seqname, (int) (pos-namelen), "", buf); } } /* If we saw a normal // end, we would've successfully read a line, * so when we get here, status (from the line read) should be eslOK. */ if (status != eslOK) esl_fatal("--small parse failed (line %d): didn't find // at end of alignment", (int) afp->linenumber); if (first_seqname) free(first_seqname); if (buf) free(buf); return eslOK; ERROR: return status; }