/* Function: esl_hxp_FitGuess() * * Purpose: Given a sorted vector of <n> observed data samples <x[]>, * from smallest <x[0]> to largest <x[n-1]>, calculate a * very crude guesstimate of a fit -- suitable only as a starting * point for further optimization -- and return those parameters * in <h>. * * Assigns $q_k \propto \frac{1}{k}$ and $\mu = \min_i x_i$; * splits $x$ into $K$ roughly equal-sized bins, and * and assigns $\lambda_k$ as the ML estimate from bin $k$. * (If $q_k$ coefficients have already been fixed to * known values, this step is skipped.) */ int esl_hxp_FitGuess(double *x, int n, ESL_HYPEREXP *h) { double tmu; /* current mu */ double mean; /* mean (x-tmu) in a bin */ int i,k; int imin, imax; h->mu = x[0]; /* minimum */ for (k = 0; k < h->K; k++) { if (! h->fixmix) h->q[k] = 1 / (double)(k+1); /* priors ~ 1, 1/2, 1/3... */ imin = (int) ((double)(k*n)/(double)h->K); imax = (int) ((double)((k+1)*n)/(double)h->K); tmu = x[imin]; mean = 0.; for (i = imin; i < imax; i++) mean += x[i] - tmu; mean /= (double)(imax-imin); h->lambda[k] = 1 / mean; } esl_vec_DNorm(h->q, h->K); return eslOK; }
/* Function: esl_vec_DLogNorm() * Synopsis: Normalize a log p-vector, make it a p-vector. * Incept: SRE, Thu Apr 7 17:45:39 2005 [St. Louis] * * Purpose: Given an unnormalized log probability vector <vec> * of length <n>, normalize it and make it a * probability vector. * * <esl_vec_FLogNorm()> does the same, but for a vector * of floats instead of doubles. * * Returns: (void); <vec> is changed in place. */ void esl_vec_DLogNorm(double *vec, int n) { double denom; denom = esl_vec_DLogSum(vec, n); esl_vec_DIncrement(vec, n, -1.*denom); esl_vec_DExp (vec, n); esl_vec_DNorm(vec, n); }
/* Function: esl_hyperexp_Read() * * Purpose: Reads hyperexponential parameters from an open <e>. * which is an <ESL_FILEPARSER> tokenizer for an open stream. * * The first token is <K>, the number of mixture components. * The second token is <mu>, the x offset shared by all components. * Then for each mixture component <k=1..K>, it reads * a mixture coefficient <q[k]> and a decay parameter * <lambda[k]>. * * The <2K+2> data tokens must occur in this order, but * they can be grouped into any number of lines, because the * parser ignores line breaks. * * Anything after a <\#> character on a line is a comment, and * is ignored. * * Returns: <eslOK> on success, and <ret_hxp> points to a new <ESL_HYPEREXP> * object. * <eslEFORMAT> on "normal" parse failure caused by a bad file * format that's likely the user's fault. * * Throws: <eslEMEM> if allocation of the new <ESL_HYPEREXP> fails. * * * FIXME: All our mixture models (esl_dirichlet, for example) should be * reconciled w/ identical interfaces & behaviour. */ int esl_hyperexp_Read(ESL_FILEPARSER *e, ESL_HYPEREXP **ret_hxp) { ESL_HYPEREXP *hxp = NULL; char *tok; int status = eslOK; int nc; int k; double sum; esl_fileparser_SetCommentChar(e, '#'); if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR; nc = atoi(tok); if (nc < 1) { sprintf(e->errbuf, "Expected # of components K >= 1 as first token"); goto ERROR; } if ((hxp = esl_hyperexp_Create(nc)) == NULL) return eslEMEM; /* percolation */ if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR; hxp->mu = atof(tok); for (k = 0; k < hxp->K; k++) { if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR; hxp->q[k] = atof(tok); if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR; hxp->lambda[k] = atof(tok); if (hxp->q[k] < 0. || hxp->q[k] > 1.) { sprintf(e->errbuf, "Expected a mixture coefficient q[k], 0<=q[k]<=1"); goto ERROR; } if (hxp->lambda[k] <= 0.) { sprintf(e->errbuf, "Expected a lambda parameter, lambda>0"); goto ERROR; } } sum = esl_vec_DSum(hxp->q, hxp->K); if (fabs(sum-1.0) > 0.05) { sprintf(e->errbuf, "Expected mixture coefficients to sum to 1"); goto ERROR; } esl_vec_DNorm(hxp->q, hxp->K); *ret_hxp = hxp; return eslOK; ERROR: esl_hyperexp_Destroy(hxp); return eslEFORMAT; }
int main(void) { double *p; char labels[] = "ACGT"; int n = 4; p = malloc(sizeof(double) * n); esl_vec_DSet(p, n, 1.0); esl_vec_DNorm(p, n); esl_vec_DDump(stdout, p, n, labels); free(p); return 0; }
/* Function: esl_msaweight_BLOSUM() * Synopsis: BLOSUM weights. * Incept: SRE, Sun Nov 5 09:52:41 2006 [Janelia] * * Purpose: Given a multiple sequence alignment <msa> and an identity * threshold <maxid>, calculate sequence weights using the * BLOSUM algorithm (Henikoff and Henikoff, PNAS * 89:10915-10919, 1992). These weights are stored * internally in the <msa> object, replacing any weights * that may have already been there. Weights are $\geq 0$ * and they sum to <msa->nseq>. * * The algorithm does a single linkage clustering by * fractional id, defines clusters such that no two clusters * have a pairwise link $\geq$ <maxid>), and assigns * weights of $\frac{1}{M_i}$ to each of the $M_i$ * sequences in each cluster $i$. The <maxid> threshold * is a fractional pairwise identity, in the range * $0..1$. * * The <msa> may be in either digitized or text mode. * Digital mode is preferred, so that the pairwise identity * calculations deal with degenerate residue symbols * properly. * * Returns: <eslOK> on success, and the weights inside <msa> have been * modified. * * Throws: <eslEMEM> on allocation error. <eslEINVAL> if a pairwise * identity calculation fails because of corrupted sequence * data. In either case, the <msa> is unmodified. * * Xref: [Henikoff92]; squid::weight.c::BlosumWeights(). */ int esl_msaweight_BLOSUM(ESL_MSA *msa, double maxid) { int *c = NULL; /* cluster assignments for each sequence */ int *nmem = NULL; /* number of seqs in each cluster */ int nc; /* number of clusters */ int i; /* loop counter */ int status; /* Contract checks */ ESL_DASSERT1( (maxid >= 0. && maxid <= 1.) ); ESL_DASSERT1( (msa->nseq >= 1) ); ESL_DASSERT1( (msa->alen >= 1) ); if (msa->nseq == 1) { msa->wgt[0] = 1.0; return eslOK; } if ((status = esl_msacluster_SingleLinkage(msa, maxid, &c, NULL, &nc)) != eslOK) goto ERROR; ESL_ALLOC(nmem, sizeof(int) * nc); esl_vec_ISet(nmem, nc, 0); for (i = 0; i < msa->nseq; i++) nmem[c[i]]++; for (i = 0; i < msa->nseq; i++) msa->wgt[i] = 1. / (double) nmem[c[i]]; /* Make weights normalize up to nseq, and return. */ esl_vec_DNorm(msa->wgt, msa->nseq); esl_vec_DScale(msa->wgt, msa->nseq, (double) msa->nseq); msa->flags |= eslMSA_HASWGTS; free(nmem); free(c); return eslOK; ERROR: if (c != NULL) free(c); if (nmem != NULL) free(nmem); return status; }
static void utest_pvectors(void) { char *msg = "pvector unit test failed"; double p1[4] = { 0.25, 0.25, 0.25, 0.25 }; double p2[4]; double p3[4]; float p1f[4]; float p2f[4] = { 0.0, 0.5, 0.5, 0.0 }; float p3f[4]; int n = 4; double result; esl_vec_D2F(p1, n, p1f); esl_vec_F2D(p2f, n, p2); if (esl_vec_DValidate(p1, n, 1e-12, NULL) != eslOK) esl_fatal(msg); if (esl_vec_FValidate(p1f, n, 1e-7, NULL) != eslOK) esl_fatal(msg); result = esl_vec_DEntropy(p1, n); if (esl_DCompare(2.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_FEntropy(p1f, n); if (esl_DCompare(2.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_DEntropy(p2, n); if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_FEntropy(p2f, n); if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_DRelEntropy(p2, p1, n); if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_FRelEntropy(p2f, p1f, n); if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_DRelEntropy(p1, p2, n); if (result != eslINFINITY) esl_fatal(msg); result = esl_vec_FRelEntropy(p1f, p2f, n); if (result != eslINFINITY) esl_fatal(msg); esl_vec_DLog(p2, n); if (esl_vec_DLogValidate(p2, n, 1e-12, NULL) != eslOK) esl_fatal(msg); esl_vec_DExp(p2, n); if (p2[0] != 0.) esl_fatal(msg); esl_vec_FLog(p2f, n); if (esl_vec_FLogValidate(p2f, n, 1e-7, NULL) != eslOK) esl_fatal(msg); esl_vec_FExp(p2f, n); if (p2f[0] != 0.) esl_fatal(msg); esl_vec_DCopy(p2, n, p3); esl_vec_DScale(p3, n, 10.); esl_vec_DNorm(p3, n); if (esl_vec_DCompare(p2, p3, n, 1e-12) != eslOK) esl_fatal(msg); esl_vec_DLog(p3, n); result = esl_vec_DLogSum(p3, n); if (esl_DCompare(0.0, result, 1e-12) != eslOK) esl_fatal(msg); esl_vec_DIncrement(p3, n, 2.0); esl_vec_DLogNorm(p3, n); if (esl_vec_DCompare(p2, p3, n, 1e-12) != eslOK) esl_fatal(msg); esl_vec_FCopy(p2f, n, p3f); esl_vec_FScale(p3f, n, 10.); esl_vec_FNorm(p3f, n); if (esl_vec_FCompare(p2f, p3f, n, 1e-7) != eslOK) esl_fatal(msg); esl_vec_FLog(p3f, n); result = esl_vec_FLogSum(p3f, n); if (esl_DCompare(0.0, result, 1e-7) != eslOK) esl_fatal(msg); esl_vec_FIncrement(p3f, n, 2.0); esl_vec_FLogNorm(p3f, n); if (esl_vec_FCompare(p2f, p3f, n, 1e-7) != eslOK) esl_fatal(msg); return; }
/* Function: esl_msaweight_GSC() * Synopsis: GSC weights. * Incept: SRE, Fri Nov 3 13:31:14 2006 [Janelia] * * Purpose: Given a multiple sequence alignment <msa>, calculate * sequence weights according to the * Gerstein/Sonnhammer/Chothia algorithm. These weights * are stored internally in the <msa> object, replacing * any weights that may have already been there. Weights * are $\geq 0$ and they sum to <msa->nseq>. * * The <msa> may be in either digitized or text mode. * Digital mode is preferred, so that distance calculations * used by the GSC algorithm are robust against degenerate * residue symbols. * * This is an implementation of Gerstein et al., "A method to * weight protein sequences to correct for unequal * representation", JMB 236:1067-1078, 1994. * * The algorithm is $O(N^2)$ memory (it requires a pairwise * distance matrix) and $O(N^3 + LN^2)$ time ($N^3$ for a UPGMA * tree building step, $LN^2$ for distance matrix construction) * for an alignment of N sequences and L columns. * * In the current implementation, the actual memory * requirement is dominated by two full NxN distance * matrices (one tmp copy in UPGMA, and one here): for * 8-byte doubles, that's $16N^2$ bytes. To keep the * calculation under memory limits, don't process large * alignments: max 1400 sequences for 32 MB, max 4000 * sequences for 256 MB, max 8000 seqs for 1 GB. Watch * out, because Pfam alignments can easily blow this up. * * Note: Memory usage could be improved. UPGMA consumes a distance * matrix, but that can be D itself, not a copy, if the * caller doesn't mind the destruction of D. Also, D is * symmetrical, so we could use upper or lower triangular * matrices if we rewrote dmatrix to allow them. * * I also think UPGMA can be reduced to O(N^2) time, by * being more tricky about rapidly identifying the minimum * element: could keep min of each row, and update that, * I think. * * Returns: <eslOK> on success, and the weights inside <msa> have been * modified. * * Throws: <eslEINVAL> if the alignment data are somehow invalid and * distance matrices can't be calculated. <eslEMEM> on an * allocation error. In either case, the original <msa> is * left unmodified. * * Xref: [Gerstein94]; squid::weight.c::GSCWeights(); STL11/81. */ int esl_msaweight_GSC(ESL_MSA *msa) { ESL_DMATRIX *D = NULL; /* distance matrix */ ESL_TREE *T = NULL; /* UPGMA tree */ double *x = NULL; /* storage per node, 0..N-2 */ double lw, rw; /* total branchlen on left, right subtrees */ double lx, rx; /* distribution of weight to left, right side */ int i; /* counter over nodes */ int status; /* Contract checks */ ESL_DASSERT1( (msa != NULL) ); ESL_DASSERT1( (msa->nseq >= 1) ); ESL_DASSERT1( (msa->alen >= 1) ); ESL_DASSERT1( (msa->wgt != NULL) ); if (msa->nseq == 1) { msa->wgt[0] = 1.0; return eslOK; } /* GSC weights use a rooted tree with "branch lengths" calculated by * UPGMA on a fractional difference matrix - pretty crude. */ if (! (msa->flags & eslMSA_DIGITAL)) { if ((status = esl_dst_CDiffMx(msa->aseq, msa->nseq, &D)) != eslOK) goto ERROR; } #ifdef eslAUGMENT_ALPHABET else { if ((status = esl_dst_XDiffMx(msa->abc, msa->ax, msa->nseq, &D)) != eslOK) goto ERROR; } #endif /* oi, look out here. UPGMA is correct, but old squid library uses * single linkage, so for regression tests ONLY, we use single link. */ #ifdef eslMSAWEIGHT_REGRESSION if ((status = esl_tree_SingleLinkage(D, &T)) != eslOK) goto ERROR; #else if ((status = esl_tree_UPGMA(D, &T)) != eslOK) goto ERROR; #endif esl_tree_SetCladesizes(T); ESL_ALLOC(x, sizeof(double) * (T->N-1)); /* Postorder traverse (leaves to root) to calculate the total branch * length under each internal node; store this in x[]. Remember the * total branch length (x[0]) for a future sanity check. */ for (i = T->N-2; i >= 0; i--) { x[i] = T->ld[i] + T->rd[i]; if (T->left[i] > 0) x[i] += x[T->left[i]]; if (T->right[i] > 0) x[i] += x[T->right[i]]; } /* Preorder traverse (root to leaves) to calculate the weights. Now * we use x[] to mean, the total weight *above* this node that we will * apportion to the node's left and right children. The two * meanings of x[] never cross: every x[] beneath x[i] is still a * total branch length. * * Because the API guarantees that msa is returned unmodified in case * of an exception, and we're touching msa->wgt here, no exceptions * may be thrown from now on in this function. */ x[0] = 0; /* initialize: no branch to the root. */ for (i = 0; i <= T->N-2; i++) { lw = T->ld[i]; if (T->left[i] > 0) lw += x[T->left[i]]; rw = T->rd[i]; if (T->right[i] > 0) rw += x[T->right[i]]; if (lw+rw == 0.) { /* A special case arises in GSC weights when all branch lengths in a subtree are 0. * In this case, all seqs in this clade should get equal weights, sharing x[i] equally. * So, split x[i] in proportion to cladesize, not to branch weight. */ if (T->left[i] > 0) lx = x[i] * ((double) T->cladesize[T->left[i]] / (double) T->cladesize[i]); else lx = x[i] / (double) T->cladesize[i]; if (T->right[i] > 0) rx = x[i] * ((double) T->cladesize[T->right[i]] / (double) T->cladesize[i]); else rx = x[i] / (double) T->cladesize[i]; } else /* normal case: x[i] split in proportion to branch weight. */ { lx = x[i] * lw/(lw+rw); rx = x[i] * rw/(lw+rw); } if (T->left[i] <= 0) msa->wgt[-(T->left[i])] = lx + T->ld[i]; else x[T->left[i]] = lx + T->ld[i]; if (T->right[i] <= 0) msa->wgt[-(T->right[i])] = rx + T->rd[i]; else x[T->right[i]] = rx + T->rd[i]; } /* Renormalize weights to sum to N. */ esl_vec_DNorm(msa->wgt, msa->nseq); esl_vec_DScale(msa->wgt, msa->nseq, (double) msa->nseq); msa->flags |= eslMSA_HASWGTS; free(x); esl_tree_Destroy(T); esl_dmatrix_Destroy(D); return eslOK; ERROR: if (x != NULL) free(x); if (T != NULL) esl_tree_Destroy(T); if (D != NULL) esl_dmatrix_Destroy(D); return status; }
/* Function: esl_msaweight_PB() * Synopsis: PB (position-based) weights. * Incept: SRE, Sun Nov 5 08:59:28 2006 [Janelia] * * Purpose: Given a multiple alignment <msa>, calculate sequence * weights according to the position-based weighting * algorithm (Henikoff and Henikoff, JMB 243:574-578, * 1994). These weights are stored internally in the <msa> * object, replacing any weights that may have already been * there. Weights are $\geq 0$ and they sum to <msa->nseq>. * * The <msa> may be in either digitized or text mode. * Digital mode is preferred, so that the algorithm * deals with degenerate residue symbols properly. * * The Henikoffs' algorithm does not give rules for dealing * with gaps or degenerate residue symbols. The rule here * is to ignore them. This means that longer sequences * initially get more weight; hence a "double * normalization" in which the weights are first divided by * sequence length in canonical residues (to compensate for * that effect), then normalized to sum to nseq. * * An advantage of the PB method is efficiency. * It is $O(1)$ in memory and $O(NL)$ time, for an alignment of * N sequences and L columns. This makes it a good method * for ad hoc weighting of very deep alignments. * * When the alignment is in simple text mode, IUPAC * degenerate symbols are not dealt with correctly; instead, * the algorithm simply uses the 26 letters as "residues" * (case-insensitively), and treats all other residues as * gaps. * * Returns: <eslOK> on success, and the weights inside <msa> have been * modified. * * Throws: <eslEMEM> on allocation error, in which case <msa> is * returned unmodified. * * Xref: [Henikoff94b]; squid::weight.c::PositionBasedWeights(). */ int esl_msaweight_PB(ESL_MSA *msa) { int *nres = NULL; /* counts of each residue observed in a column */ int ntotal; /* number of different symbols observed in a column */ int rlen; /* number of residues in a sequence */ int idx, pos, i; int K; /* alphabet size */ int status; /* Contract checks */ ESL_DASSERT1( (msa->nseq >= 1) ); ESL_DASSERT1( (msa->alen >= 1) ); if (msa->nseq == 1) { msa->wgt[0] = 1.0; return eslOK; } /* Initialize */ if (! (msa->flags & eslMSA_DIGITAL)) { ESL_ALLOC(nres, sizeof(int) * 26); K = 26; } #ifdef eslAUGMENT_ALPHABET else { ESL_ALLOC(nres, sizeof(int) * msa->abc->K); K = msa->abc->K; } #endif esl_vec_DSet(msa->wgt, msa->nseq, 0.); /* This section handles text alignments */ if (! (msa->flags & eslMSA_DIGITAL)) { for (pos = 0; pos < msa->alen; pos++) { /* Collect # of letters A..Z in this column, and total */ esl_vec_ISet(nres, K, 0.); for (idx = 0; idx < msa->nseq; idx++) if (isalpha((int) msa->aseq[idx][pos])) nres[toupper((int) msa->aseq[idx][pos]) - 'A'] ++; for (ntotal = 0, i = 0; i < K; i++) if (nres[i] > 0) ntotal++; /* Bump weight on each seq by PB rule */ if (ntotal > 0) { for (idx = 0; idx < msa->nseq; idx++) { if (isalpha((int) msa->aseq[idx][pos])) msa->wgt[idx] += 1. / (double) (ntotal * nres[toupper((int) msa->aseq[idx][pos]) - 'A'] ); } } } /* first normalization by # of residues counted in each seq */ for (idx = 0; idx < msa->nseq; idx++) { for (rlen = 0, pos = 0; pos < msa->alen; pos++) if (isalpha((int) msa->aseq[idx][pos])) rlen++; if (ntotal > 0) msa->wgt[idx] /= (double) rlen; /* if rlen == 0 for this seq, its weight is still 0.0, as initialized. */ } } /* This section handles digital alignments. */ #ifdef eslAUGMENT_ALPHABET else { for (pos = 1; pos <= msa->alen; pos++) { /* Collect # of residues 0..K-1 in this column, and total # */ esl_vec_ISet(nres, K, 0.); for (idx = 0; idx < msa->nseq; idx++) if (esl_abc_XIsCanonical(msa->abc, msa->ax[idx][pos])) nres[(int) msa->ax[idx][pos]] ++; for (ntotal = 0, i = 0; i < K; i++) if (nres[i] > 0) ntotal++; /* Bump weight on each sequence by PB rule */ if (ntotal > 0) { for (idx = 0; idx < msa->nseq; idx++) { if (esl_abc_XIsCanonical(msa->abc, msa->ax[idx][pos])) msa->wgt[idx] += 1. / (double) (ntotal * nres[msa->ax[idx][pos]]); } } } /* first normalization by # of residues counted in each seq */ for (idx = 0; idx < msa->nseq; idx++) { for (rlen = 0, pos = 1; pos <= msa->alen; pos++) if (esl_abc_XIsCanonical(msa->abc, msa->ax[idx][pos])) rlen++; if (rlen > 0) msa->wgt[idx] /= (double) rlen; /* if rlen == 0 for this seq, its weight is still 0.0, as initialized. */ } } #endif /* Make weights normalize up to nseq, and return. In pathological * case where all wgts were 0 (no seqs contain any unambiguous * residues), weights become 1.0. */ esl_vec_DNorm(msa->wgt, msa->nseq); esl_vec_DScale(msa->wgt, msa->nseq, (double) msa->nseq); msa->flags |= eslMSA_HASWGTS; free(nres); return eslOK; ERROR: if (nres != NULL) free(nres); return status; }
/* dump_infocontent_info * * Given an MSA with RF annotation, dump information content per column data to * an open output file. */ static int dump_infocontent_info(FILE *fp, ESL_ALPHABET *abc, double **abc_ct, int use_weights, int nali, int64_t alen, int nseq, int *i_am_rf, char *msa_name, char *alifile, char *errbuf) { int status; int apos, rfpos; double bg_ent; double *bg = NULL; double *abc_freq = NULL; double nnongap; ESL_ALLOC(bg, sizeof(double) * abc->K); esl_vec_DSet(bg, abc->K, 1./(abc->K)); bg_ent = esl_vec_DEntropy(bg, abc->K); free(bg); ESL_ALLOC(abc_freq, sizeof(double) * abc->K); fprintf(fp, "# Information content per column (bits):\n"); fprintf(fp, "# Alignment file: %s\n", alifile); fprintf(fp, "# Alignment idx: %d\n", nali); if(msa_name != NULL) { fprintf(fp, "# Alignment name: %s\n", msa_name); } fprintf(fp, "# Number of sequences: %d\n", nseq); if(use_weights) { fprintf(fp, "# IMPORTANT: Counts are weighted based on sequence weights in alignment file.\n"); } else { fprintf(fp, "# Sequence weights from alignment were ignored (if they existed).\n"); } fprintf(fp, "#\n"); if(i_am_rf != NULL) { fprintf(fp, "# %7s %7s %10s %10s\n", "rfpos", "alnpos", "freqnongap", "info(bits)"); fprintf(fp, "# %7s %7s %10s %10s\n", "-------", "-------", "----------", "----------"); } else { fprintf(fp, "# %7s %10s %10s\n", "alnpos", "freqnongap", "info(bits)"); fprintf(fp, "# %7s %10s %10s\n", "-------", "----------", "----------"); } rfpos = 0; for(apos = 0; apos < alen; apos++) { if(i_am_rf != NULL) { if(i_am_rf[apos]) { fprintf(fp, " %7d", rfpos+1); rfpos++; } else { fprintf(fp, " %7s", "-"); } } nnongap = esl_vec_DSum(abc_ct[apos], abc->K); esl_vec_DCopy(abc_ct[apos], abc->K, abc_freq); esl_vec_DNorm(abc_freq, abc->K); fprintf(fp, " %7d %10.8f %10.8f\n", apos+1, nnongap / (nnongap + abc_ct[apos][abc->K]), (bg_ent - esl_vec_DEntropy(abc_freq, abc->K))); } fprintf(fp, "//\n"); if(abc_freq != NULL) free(abc_freq); return eslOK; ERROR: ESL_FAIL(eslEINVAL, errbuf, "out of memory"); return status; /* NEVERREACHED */ }