/* Function: esl_vec_DLogValidate() * Synopsis: Verify that vector is a log-p-vector. * Incept: ER, Tue Dec 5 09:46:51 EST 2006 [janelia] * * Purpose: Validate a log probability vector <vec> of length <n>. * The exp of each element has to be between 0 and 1, and * the sum of all elements has to be 1. * * Args: v - log p vector to validate. * n - dimensionality of v * tol - convergence criterion applied to sum of exp v * errbuf - NULL, or a failure message buffer allocated * for at least p7_ERRBUFSIZE chars. * * Returns: <eslOK> on success, or <eslFAIL> on failure; upon failure, * if caller provided a non-<NULL> <errbuf>, an informative * message is left there. * * Throws: <eslEMEM> on allocation failure. */ int esl_vec_DLogValidate(double *vec, int n, double tol, char *errbuf) { int status; double *expvec = NULL; if (errbuf) *errbuf = 0; if (n == 0) return eslOK; ESL_ALLOC(expvec, sizeof(double)*n); esl_vec_DCopy(vec, n, expvec); esl_vec_DExp(expvec, n); if ((status = esl_vec_DValidate(expvec, n, tol, errbuf)) != eslOK) goto ERROR; free(expvec); return eslOK; ERROR: if (expvec != NULL) free(expvec); return status; }
/* Function: p7_prior_CreateAmino() * Incept: SRE, Sat Mar 24 09:35:36 2007 [Janelia] * * Purpose: Creates the default mixture Dirichlet prior for protein * sequences. * * The transition priors (match, insert, delete) are all * single Dirichlets, originally trained by Graeme * Mitchison in the mid-1990's. Notes have been lost, but * we believe they were trained on an early version of * Pfam. * * The match emission prior is a nine-component mixture * from Kimmen Sjolander, who trained it on the Blocks9 * database \citep{Sjolander96}. * * The insert emission prior is a single Dirichlet with * high $|\alpha|$, such that insert emission probabilities * are essentially fixed by the prior, regardless of * observed count data. The slightly polar parameterization * was obtained by training on Pfam 1.0. * * Returns: a pointer to the new <P7_PRIOR> structure. */ P7_PRIOR * p7_prior_CreateAmino(void) { int status; P7_PRIOR *pri = NULL; int q; /* default match mixture coefficients: [Sjolander96] */ static double defmq[9] = { 0.178091, 0.056591, 0.0960191, 0.0781233, 0.0834977, 0.0904123, 0.114468, 0.0682132, 0.234585 }; /* default match mixture Dirichlet components [Sjolander96] */ static double defm[9][20] = { { 0.270671, 0.039848, 0.017576, 0.016415, 0.014268, 0.131916, 0.012391, 0.022599, 0.020358, 0.030727, 0.015315, 0.048298, 0.053803, 0.020662, 0.023612, 0.216147, 0.147226, 0.065438, 0.003758, 0.009621 }, { 0.021465, 0.010300, 0.011741, 0.010883, 0.385651, 0.016416, 0.076196, 0.035329, 0.013921, 0.093517, 0.022034, 0.028593, 0.013086, 0.023011, 0.018866, 0.029156, 0.018153, 0.036100, 0.071770, 0.419641 }, { 0.561459, 0.045448, 0.438366, 0.764167, 0.087364, 0.259114, 0.214940, 0.145928, 0.762204, 0.247320, 0.118662, 0.441564, 0.174822, 0.530840, 0.465529, 0.583402, 0.445586, 0.227050, 0.029510, 0.121090 }, { 0.070143, 0.011140, 0.019479, 0.094657, 0.013162, 0.048038, 0.077000, 0.032939, 0.576639, 0.072293, 0.028240, 0.080372, 0.037661, 0.185037, 0.506783, 0.073732, 0.071587, 0.042532, 0.011254, 0.028723 }, { 0.041103, 0.014794, 0.005610, 0.010216, 0.153602, 0.007797, 0.007175, 0.299635, 0.010849, 0.999446, 0.210189, 0.006127, 0.013021, 0.019798, 0.014509, 0.012049, 0.035799, 0.180085, 0.012744, 0.026466 }, { 0.115607, 0.037381, 0.012414, 0.018179, 0.051778, 0.017255, 0.004911, 0.796882, 0.017074, 0.285858, 0.075811, 0.014548, 0.015092, 0.011382, 0.012696, 0.027535, 0.088333, 0.944340, 0.004373, 0.016741 }, { 0.093461, 0.004737, 0.387252, 0.347841, 0.010822, 0.105877, 0.049776, 0.014963, 0.094276, 0.027761, 0.010040, 0.187869, 0.050018, 0.110039, 0.038668, 0.119471, 0.065802, 0.025430, 0.003215, 0.018742 }, { 0.452171, 0.114613, 0.062460, 0.115702, 0.284246, 0.140204, 0.100358, 0.550230, 0.143995, 0.700649, 0.276580, 0.118569, 0.097470, 0.126673, 0.143634, 0.278983, 0.358482, 0.661750, 0.061533, 0.199373 }, { 0.005193, 0.004039, 0.006722, 0.006121, 0.003468, 0.016931, 0.003647, 0.002184, 0.005019, 0.005990, 0.001473, 0.004158, 0.009055, 0.003630, 0.006583, 0.003172, 0.003690, 0.002967, 0.002772, 0.002686 }, }; ESL_ALLOC(pri, sizeof(P7_PRIOR)); pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL; pri->tm = esl_mixdchlet_Create(1, 3); /* single component; 3 params */ pri->ti = esl_mixdchlet_Create(1, 2); /* single component; 2 params */ pri->td = esl_mixdchlet_Create(1, 2); /* single component; 2 params */ pri->em = esl_mixdchlet_Create(9, 20); /* 9 component; 20 params */ pri->ei = esl_mixdchlet_Create(1, 20); /* single component; 20 params */ if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR; /* Transition priors: originally from Graeme Mitchison. Notes are lost, but we believe * they were trained on an early version of Pfam. */ pri->tm->pq[0] = 1.0; pri->tm->alpha[0][0] = 0.7939; /* TMM */ pri->tm->alpha[0][1] = 0.0278; /* TMI */ /* Markus suggests ~10x MD, ~0.036; test! */ pri->tm->alpha[0][2] = 0.0135; /* TMD */ /* Markus suggests 0.1x MI, ~0.004; test! */ pri->ti->pq[0] = 1.0; pri->ti->alpha[0][0] = 0.1551; /* TIM */ pri->ti->alpha[0][1] = 0.1331; /* TII */ pri->td->pq[0] = 1.0; pri->td->alpha[0][0] = 0.9002; /* TDM */ pri->td->alpha[0][1] = 0.5630; /* TDD */ /* Match emission priors are from Kimmen Sjolander, trained * on the Blocks9 database. [Sjolander96] */ for (q = 0; q < 9; q++) { pri->em->pq[q] = defmq[q]; esl_vec_DCopy(defm[q], 20, pri->em->alpha[q]); } /* Insert emission priors were trained on Pfam 1.0, 10 Nov 1996; * see ~/projects/plan7/InsertStatistics. * Inserts are slightly biased towards polar residues and away from * hydrophobic residues. */ pri->ei->pq[0] = 1.0; pri->ei->alpha[0][0] = 681.; /* A */ pri->ei->alpha[0][1] = 120.; /* C */ pri->ei->alpha[0][2] = 623.; /* D */ pri->ei->alpha[0][3] = 651.; /* E */ pri->ei->alpha[0][4] = 313.; /* F */ pri->ei->alpha[0][5] = 902.; /* G */ pri->ei->alpha[0][6] = 241.; /* H */ pri->ei->alpha[0][7] = 371.; /* I */ pri->ei->alpha[0][8] = 687.; /* K */ pri->ei->alpha[0][9] = 676.; /* L */ pri->ei->alpha[0][10] = 143.; /* M */ pri->ei->alpha[0][11] = 548.; /* N */ pri->ei->alpha[0][12] = 647.; /* P */ pri->ei->alpha[0][13] = 415.; /* Q */ pri->ei->alpha[0][14] = 551.; /* R */ pri->ei->alpha[0][15] = 926.; /* S */ pri->ei->alpha[0][16] = 623.; /* T */ pri->ei->alpha[0][17] = 505.; /* V */ pri->ei->alpha[0][18] = 102.; /* W */ pri->ei->alpha[0][19] = 269.; /* Y */ return pri; ERROR: if (pri != NULL) p7_prior_Destroy(pri); return NULL; }
/* Function: p7_prior_CreateNucleic() * * Purpose: Creates the default mixture Dirichlet prior for nucleotide * sequences. * * The transition priors (match, insert, delete) are all * single Dirichlets, trained on a portion of the rmark dataset * * The match emission prior is an eight-component mixture * trained against a portion of the rmark dataset * * The insert emission prior is a single Dirichlet with * high $|\alpha|$, such that insert emission probabilities * are essentially fixed by the prior, regardless of * observed count data. * * Returns: a pointer to the new <P7_PRIOR> structure. */ P7_PRIOR * p7_prior_CreateNucleic(void) { int status; P7_PRIOR *pri = NULL; int q; /* Plus-1 Laplace prior int num_comp = 1; static double defmq[2] = { 1.0 }; static double defm[1][4] = { { 1.0, 1.0, 1.0, 1.0} // }; */ /* Match emission priors are trained on Rmark3 database * Xref: ~wheelert/notebook/2011/0325_nhmmer_new_parameters */ int num_comp = 4; static double defmq[4] = { 0.24, 0.26, 0.08, 0.42 }; static double defm[4][4] = { { 0.16, 0.45, 0.12, 0.39}, { 0.09, 0.03, 0.09, 0.04}, { 1.29, 0.40, 6.58, 0.51}, { 1.74, 1.49, 1.57, 1.95} }; ESL_ALLOC(pri, sizeof(P7_PRIOR)); pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL; pri->tm = esl_mixdchlet_Create(1, 3); // match transitions; single component; 3 params pri->ti = esl_mixdchlet_Create(1, 2); // insert transitions; single component; 2 params pri->td = esl_mixdchlet_Create(1, 2); // delete transitions; single component; 2 params pri->em = esl_mixdchlet_Create(num_comp, 4); // match emissions; X component; 4 params pri->ei = esl_mixdchlet_Create(1, 4); // insert emissions; single component; 4 params if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR; /* Transition priors: roughly, learned from rmark benchmark - hand-beautified (trimming overspecified significant digits) */ pri->tm->pq[0] = 1.0; pri->tm->alpha[0][0] = 2.0; // TMM pri->tm->alpha[0][1] = 0.1; // TMI pri->tm->alpha[0][2] = 0.1; // TMD pri->ti->pq[0] = 1.0; pri->ti->alpha[0][0] = 0.06; // TIM pri->ti->alpha[0][1] = 0.2; // TII pri->td->pq[0] = 1.0; pri->td->alpha[0][0] = 0.1; // TDM pri->td->alpha[0][1] = 0.2; // TDD /* Match emission priors */ for (q = 0; q < num_comp; q++) { pri->em->pq[q] = defmq[q]; esl_vec_DCopy(defm[q], 4, pri->em->alpha[q]); } /* Insert emission priors. Should that alphas be lower? higher? */ pri->ei->pq[0] = 1.0; esl_vec_DSet(pri->ei->alpha[0], 4, 1.0); return pri; ERROR: if (pri != NULL) p7_prior_Destroy(pri); return NULL; }
static void utest_pvectors(void) { char *msg = "pvector unit test failed"; double p1[4] = { 0.25, 0.25, 0.25, 0.25 }; double p2[4]; double p3[4]; float p1f[4]; float p2f[4] = { 0.0, 0.5, 0.5, 0.0 }; float p3f[4]; int n = 4; double result; esl_vec_D2F(p1, n, p1f); esl_vec_F2D(p2f, n, p2); if (esl_vec_DValidate(p1, n, 1e-12, NULL) != eslOK) esl_fatal(msg); if (esl_vec_FValidate(p1f, n, 1e-7, NULL) != eslOK) esl_fatal(msg); result = esl_vec_DEntropy(p1, n); if (esl_DCompare(2.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_FEntropy(p1f, n); if (esl_DCompare(2.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_DEntropy(p2, n); if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_FEntropy(p2f, n); if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_DRelEntropy(p2, p1, n); if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_FRelEntropy(p2f, p1f, n); if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg); result = esl_vec_DRelEntropy(p1, p2, n); if (result != eslINFINITY) esl_fatal(msg); result = esl_vec_FRelEntropy(p1f, p2f, n); if (result != eslINFINITY) esl_fatal(msg); esl_vec_DLog(p2, n); if (esl_vec_DLogValidate(p2, n, 1e-12, NULL) != eslOK) esl_fatal(msg); esl_vec_DExp(p2, n); if (p2[0] != 0.) esl_fatal(msg); esl_vec_FLog(p2f, n); if (esl_vec_FLogValidate(p2f, n, 1e-7, NULL) != eslOK) esl_fatal(msg); esl_vec_FExp(p2f, n); if (p2f[0] != 0.) esl_fatal(msg); esl_vec_DCopy(p2, n, p3); esl_vec_DScale(p3, n, 10.); esl_vec_DNorm(p3, n); if (esl_vec_DCompare(p2, p3, n, 1e-12) != eslOK) esl_fatal(msg); esl_vec_DLog(p3, n); result = esl_vec_DLogSum(p3, n); if (esl_DCompare(0.0, result, 1e-12) != eslOK) esl_fatal(msg); esl_vec_DIncrement(p3, n, 2.0); esl_vec_DLogNorm(p3, n); if (esl_vec_DCompare(p2, p3, n, 1e-12) != eslOK) esl_fatal(msg); esl_vec_FCopy(p2f, n, p3f); esl_vec_FScale(p3f, n, 10.); esl_vec_FNorm(p3f, n); if (esl_vec_FCompare(p2f, p3f, n, 1e-7) != eslOK) esl_fatal(msg); esl_vec_FLog(p3f, n); result = esl_vec_FLogSum(p3f, n); if (esl_DCompare(0.0, result, 1e-7) != eslOK) esl_fatal(msg); esl_vec_FIncrement(p3f, n, 2.0); esl_vec_FLogNorm(p3f, n); if (esl_vec_FCompare(p2f, p3f, n, 1e-7) != eslOK) esl_fatal(msg); return; }
/* Function: p7_prior_CreateNucleicNew() * Incept: TJW, Thu Nov 12 21:15:11 EST 2009 [Couch at home] * * Purpose: Creates the default mixture Dirichlet prior for nucleotiden * sequences. * * The transition priors (match, insert, delete) are all * single Dirichlets, originally trained by Graeme * Mitchison in the mid-1990's. Notes have been lost, but * we believe they were trained on an early version of * Pfam. * * The match emission prior is an eight-component mixture * trained against a portion of the rmark dataset * * The insert emission prior is a single Dirichlet with * high $|\alpha|$, such that insert emission probabilities * are essentially fixed by the prior, regardless of * observed count data. * * Returns: a pointer to the new <P7_PRIOR> structure. */ P7_PRIOR * p7_prior_CreateNucleic(void) { int status; P7_PRIOR *pri = NULL; int q; /* Match emission priors are trained on rmark database [Nawrocki 08] */ /* Plus-1 Laplace prior int num_comp = 1; static double defmq[2] = { 1.0 }; static double defm[1][4] = { { 1.0, 1.0, 1.0, 1.0} // }; */ /* int num_comp = 2; static double defmq[2] = { 0.42, 0.58 }; static double defm[2][4] = { { 0.94, 0.90, 0.89, 1.13}, // { 0.096, 0.078, 0.093, 0.089} // }; */ /* //weird - but this performs marginally better than the best 2- 5- or 8-component mixtures tested // (on rmark - MER: 2 better than 5/8-comp , 3 better than 2-comp ) int num_comp = 4; static double defmq[4] = { 0.16, 0.29, 0.12, 0.43 }; static double defm[4][4] = { { 0.36, 0.10, 5.3, 0.13}, // G { 0.05, 0.18, 0.03, 0.19}, // CT { 7.1, 0.13, 0.35, 0.17}, // A { 0.96, 0.92, 0.91, 1.19} // uniform }; */ /*On rmark, this model does only slightly better than the 2-component model It's chosen as the default on grounds of reasonableness, given that it shows a non-uniform transition:transversion ratio. It's based on the results of training against a portion of rmark, but the overspecified numbers resulting from that training have been rounded/simplified. */ int num_comp = 5; static double defmq[5] = { 0.16, 0.13, 0.17, 0.15, 0.39 }; static double defm[5][4] = { { 6.0, 0.2, 0.5, 0.2}, // A { 0.2, 8.0, 0.2, 0.5}, // C { 0.5, 0.2, 8.0, 0.2}, // G { 0.2, 0.5, 0.2, 4.0}, // T { 1.3, 1.2, 1.2, 1.4} // uniform }; /* gives no improved performance in my hands over the 5-component model int num_comp = 8; static double defmq[8] = { 0.13, 0.08, 0.08, 0.13, 0.08, 0.08, 0.17, 0.25 } ; static double defm[8][4] = { { 4.0, 0.3, 0.5, 0.4}, // A { 0.3, 22.0, 0.3, 0.8}, // C { 1.0, 0.4, 28.0, 0.4}, // G { 0.5, 0.8, 0.3, 6.0}, // T { 1.8, 0.8, 6.0, 1.0}, // AG { 0.6, 6.0, 0.6, 2.4}, // CT { 0.03, 0.01, 0.02, 0.02}, // anything, but highly conserved { 2.0, 2.0, 2.0, 2.0} // anything, not much conservation }; */ ESL_ALLOC(pri, sizeof(P7_PRIOR)); pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL; pri->tm = esl_mixdchlet_Create(1, 3); // match transitions; single component; 3 params pri->ti = esl_mixdchlet_Create(1, 2); // insert transitions; single component; 2 params pri->td = esl_mixdchlet_Create(1, 2); // delete transitions; single component; 2 params pri->em = esl_mixdchlet_Create(num_comp, 4); // match emissions; X component; 4 params pri->ei = esl_mixdchlet_Create(1, 4); // insert emissions; single component; 4 params if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR; /* Transition priors: roughly, learned from rmark benchmark - hand-beautified (trimming overspecified significant digits) */ pri->tm->pq[0] = 1.0; pri->tm->alpha[0][0] = 2.0; // TMM pri->tm->alpha[0][1] = 0.1; // TMI pri->tm->alpha[0][2] = 0.1; // TMD pri->ti->pq[0] = 1.0; pri->ti->alpha[0][0] = 0.06; // TIM pri->ti->alpha[0][1] = 0.2; // TII pri->td->pq[0] = 1.0; pri->td->alpha[0][0] = 0.1; // TDM pri->td->alpha[0][1] = 0.2; // TDD /* Match emission priors */ for (q = 0; q < num_comp; q++) { pri->em->pq[q] = defmq[q]; esl_vec_DCopy(defm[q], 4, pri->em->alpha[q]); } /* Insert emission priors. Should that alphas be lower? higher? */ pri->ei->pq[0] = 1.0; esl_vec_DSet(pri->ei->alpha[0], 4, 1.0); return pri; ERROR: if (pri != NULL) p7_prior_Destroy(pri); return NULL; }
/* Function: esl_min_ConjugateGradientDescent() * Incept: SRE, Wed Jun 22 08:49:42 2005 [St. Louis] * * Purpose: n-dimensional minimization by conjugate gradient descent. * * An initial point is provided by <x>, a vector of <n> * components. The caller also provides a function <*func()> that * compute the objective function f(x) when called as * <(*func)(x, n, prm)>, and a function <*dfunc()> that can * compute the gradient <dx> at <x> when called as * <(*dfunc)(x, n, prm, dx)>, given an allocated vector <dx> * to put the derivative in. Any additional data or fixed * parameters that these functions require are passed by * the void pointer <prm>. * * The first step of each iteration is to try to bracket * the minimum along the current direction. The initial step * size is controlled by <u[]>; the first step will not exceed * <u[i]> for any dimension <i>. (You can think of <u> as * being the natural "units" to use along a graph axis, if * you were plotting the objective function.) * * The caller also provides an allocated workspace sufficient to * hold four allocated n-vectors. (4 * sizeof(double) * n). * * Iterations continue until the objective function has changed * by less than a fraction <tol>. This should not be set to less than * sqrt(<DBL_EPSILON>). * * Upon return, <x> is the minimum, and <ret_fx> is f(x), * the function value at <x>. * * Args: x - an initial guess n-vector; RETURN: x at the minimum * u - "units": maximum initial step size along gradient when bracketing. * n - dimensionality of all vectors * *func() - function for computing objective function f(x) * *dfunc() - function for computing a gradient at x * prm - void ptr to any data/params func,dfunc need * tol - convergence criterion applied to f(x) * wrk - allocated 4xn-vector for workspace * ret_fx - optRETURN: f(x) at the minimum * * Returns: <eslOK> on success. * * Throws: <eslENOHALT> if it fails to converge in MAXITERATIONS. * <eslERANGE> if the minimum is not finite, which may * indicate a problem in the implementation or choice of <*func()>. * * Xref: STL9/101. */ int esl_min_ConjugateGradientDescent(double *x, double *u, int n, double (*func)(double *, int, void *), void (*dfunc)(double *, int, void *, double *), void *prm, double tol, double *wrk, double *ret_fx) { double oldfx; double coeff; int i, i1; double *dx, *cg, *w1, *w2; double cvg; double fa,fb,fc; double ax,bx,cx; double fx; dx = wrk; cg = wrk + n; w1 = wrk + 2*n; w2 = wrk + 3*n; oldfx = (*func)(x, n, prm); /* init the objective function */ /* Bail out if the function is +/-inf: this can happen if the caller * has screwed something up, or has chosen a bad start point. */ if (oldfx == eslINFINITY || oldfx == -eslINFINITY) ESL_EXCEPTION(eslERANGE, "minimum not finite"); if (dfunc != NULL) { (*dfunc)(x, n, prm, dx); /* find the current negative gradient, - df(x)/dxi */ esl_vec_DScale(dx, n, -1.0); } else numeric_derivative(x, u, n, func, prm, 1e-4, dx); /* resort to brute force */ esl_vec_DCopy(dx, n, cg); /* and make that the first conjugate direction, cg */ /* (failsafe) convergence test: a zero direction can happen, * and it either means we're stuck or we're finished (most likely stuck) */ for (i1 = 0; i1 < n; i1++) if (cg[i1] != 0.) break; if (i1 == n) { if (ret_fx != NULL) *ret_fx = oldfx; return eslOK; } for (i = 0; i < MAXITERATIONS; i++) { /* Figure out the initial step size. */ bx = fabs(u[0] / cg[0]); for (i1 = 1; i1 < n; i1++) { cx = fabs(u[i1] / cg[i1]); if (cx < bx) bx = cx; } /* Bracket the minimum. */ bracket(x, cg, n, bx, func, prm, w1, &ax, &bx, &cx, &fa, &fb, &fc); /* Minimize along the line given by the conjugate gradient <cg> */ brent(x, cg, n, func, prm, ax, cx, 1e-3, 1e-8, w2, NULL, &fx); esl_vec_DCopy(w2, n, x); /* Bail out if the function is now +/-inf: this can happen if the caller * has screwed something up. */ if (fx == eslINFINITY || fx == -eslINFINITY) ESL_EXCEPTION(eslERANGE, "minimum not finite"); /* Find the negative gradient at that point (temporarily in w1) */ if (dfunc != NULL) { (*dfunc)(x, n, prm, w1); esl_vec_DScale(w1, n, -1.0); } else numeric_derivative(x, u, n, func, prm, 1e-4, w1); /* resort to brute force */ /* Calculate the Polak-Ribiere coefficient */ for (coeff = 0., i1 = 0; i1 < n; i1++) coeff += (w1[i1] - dx[i1]) * w1[i1]; coeff /= esl_vec_DDot(dx, dx, n); /* Calculate the next conjugate gradient direction in w2 */ esl_vec_DCopy(w1, n, w2); esl_vec_DAddScaled(w2, cg, coeff, n); /* Finishing set up for next iteration: */ esl_vec_DCopy(w1, n, dx); esl_vec_DCopy(w2, n, cg); /* Now: x is the current point; * fx is the function value at that point; * dx is the current gradient at x; * cg is the current conjugate gradient direction. */ /* Main convergence test. 1e-9 factor is fudging the case where our * minimum is at exactly f()=0. */ cvg = 2.0 * fabs((oldfx-fx)) / (1e-10 + fabs(oldfx) + fabs(fx)); // fprintf(stderr, "(%d): Old f() = %.9f New f() = %.9f Convergence = %.9f\n", i, oldfx, fx, cvg); // fprintf(stdout, "(%d): Old f() = %.9f New f() = %.9f Convergence = %.9f\n", i, oldfx, fx, cvg); #if eslDEBUGLEVEL >= 2 printf("\nesl_min_ConjugateGradientDescent():\n"); printf("new point: "); for (i1 = 0; i1 < n; i1++) printf("%g ", x[i1]); printf("\nnew gradient: "); for (i1 = 0; i1 < n; i1++) printf("%g ", dx[i1]); numeric_derivative(x, u, n, func, prm, 1e-4, w1); printf("\n(numeric grad): "); for (i1 = 0; i1 < n; i1++) printf("%g ", w1[i1]); printf("\nnew direction: "); for (i1 = 0; i1 < n; i1++) printf("%g ", cg[i1]); printf("\nOld f() = %g New f() = %g Convergence = %g\n\n", oldfx, fx, cvg); #endif if (cvg <= tol) break; /* Second (failsafe) convergence test: a zero direction can happen, * and it either means we're stuck or we're finished (most likely stuck) */ for (i1 = 0; i1 < n; i1++) if (cg[i1] != 0.) break; if (i1 == n) break; oldfx = fx; } if (ret_fx != NULL) *ret_fx = fx; if (i == MAXITERATIONS) ESL_FAIL(eslENOHALT, NULL, " "); // ESL_EXCEPTION(eslENOHALT, "Failed to converge in ConjugateGradientDescent()"); return eslOK; }
/* brent(): * SRE, Sun Jul 10 19:07:05 2005 [St. Louis] * * Purpose: Quasi-one-dimensional minimization of a function <*func()> * in <n>-dimensions, along vector <dir> starting from a * point <ori>. Identifies a scalar $x$ that approximates * the position of the minimum along this direction, in a * given bracketing interval (<a,b>). The minimum must * have been bracketed by the caller in the <(a,b)> * interval. <a> is often 0, because we often start at the * <ori>. * * A quasi-1D scalar coordinate $x$ (such as <a> or <b>) is * transformed to a point $\mathbf{p}$ in n-space as: * $\mathbf{p} = \mathbf{\mbox{ori}} + x * \mathbf{\mbox{dir}}$. * * Any extra (fixed) data needed to calculate <func> can be * passed through the void <prm> pointer. * * <eps> and <t> define the relative convergence tolerance, * $\mbox{tol} = \mbox{eps} |x| + t$. <eps> should not be * less than the square root of the machine precision. The * <DBL_EPSILON> is 2.2e-16 on many machines with 64-bit * doubles, so <eps> is on the order of 1e-8 or more. <t> * is a yet smaller number, used to avoid nonconvergence in * the pathological case $x=0$. * * Upon convergence (which is guaranteed), returns <xvec>, * the n-dimensional minimum. Optionally, will also return * <ret_x>, the scalar <x> that resulted in that * n-dimensional minimum, and <ret_fx>, the objective * function <*func(x)> at the minimum. * * This is an implementation of the R.P. Brent (1973) * algorithm for one-dimensional minimization without * derivatives (modified from Brent's ALGOL60 code). Uses a * combination of bisection search and parabolic * interpolation; should exhibit superlinear convergence in * most functions. * * * Args: ori - n-vector at origin * dir - direction vector (gradient) we're following from ori * n - dimensionality of ori, dir, and xvec * (*func) - ptr to caller's objective function * prm - ptr to any additional data (*func)() needs * a,b - minimum is bracketed on interval [a,b] * eps - tol = eps |x| + t; eps >= 2 * relative machine precision * t - additional factor for tol to avoid x=0 case. * xvec - RETURN: minimum, as an n-vector (caller allocated) * ret_x - optRETURN: scalar multiplier that gave xvec * ret_fx - optRETURN: f(x) * * Returns: (void) * * Reference: See [Brent73], Chapter 5. My version is derived directly * from Brent's description and his ALGOL60 code. I've * preserved his variable names as much as possible, to * make the routine follow his published description * closely. The Brent algorithm is also discussed in * Numerical Recipes [Press88]. */ static void brent(double *ori, double *dir, int n, double (*func)(double *, int, void *), void *prm, double a, double b, double eps, double t, double *xvec, double *ret_x, double *ret_fx) { double w,x,v,u; /* with [a,b]: Brent's six points */ double m; /* midpoint of current [a,b] interval */ double tol; /* tolerance = eps|x| + t */ double fu,fv,fw,fx; /* function evaluations */ double p,q; /* numerator, denominator of parabolic interpolation */ double r; double d,e; /* last, next-to-last values of p/q */ double c = 1. - (1./eslCONST_GOLD); /* Brent's c; 0.381966; golden ratio */ int niter; /* number of iterations */ x=v=w= a + c*(b-a); /* initial guess of x by golden section */ esl_vec_DCopy(ori, n, xvec); /* build xvec from ori, dir, x */ esl_vec_DAddScaled(xvec, dir, x, n); fx=fv=fw = (*func)(xvec, n, prm); /* initial function evaluation */ e = 0.; niter = 0; while (1) /* algorithm is guaranteed to converge. */ { m = 0.5 * (a+b); tol = eps*fabs(x) + t; if (fabs(x-m) <= 2*tol - 0.5*(b-a)) break; /* convergence test. */ niter++; p = q = r = 0.; if (fabs(e) > tol) { /* Compute parabolic interpolation, u = x + p/q */ r = (x-w)*(fx-fv); q = (x-v)*(fx-fw); p = (x-v)*q - (x-w)*r; q = 2*(q-r); if (q > 0) { p = -p; } else {q = -q;} r = e; e=d; /* e is now the next-to-last p/q */ } if (fabs(p) < fabs(0.5*q*r) || p < q*(a-x) || p < q*(b-x)) { /* Seems well-behaved? Use parabolic interpolation to compute new point u */ d = p/q; /* d remembers last p/q */ u = x+d; /* trial point, for now... */ if (2.0*(u-a) < tol || 2.0*(b-u) < tol) /* don't evaluate func too close to a,b */ d = (x < m)? tol : -tol; } else /* Badly behaved? Use golden section search to compute u. */ { e = (x<m)? b-x : a-x; /* e = largest interval */ d = c*e; } /* Evaluate f(), but not too close to x. */ if (fabs(d) >= tol) u = x+d; else if (d > 0) u = x+tol; else u = x-tol; esl_vec_DCopy(ori, n, xvec); /* build xvec from ori, dir, u */ esl_vec_DAddScaled(xvec, dir, u, n); fu = (*func)(xvec, n, prm); /* f(u) */ /* Bookkeeping. */ if (fu <= fx) { if (u < x) b = x; else a = x; v = w; fv = fw; w = x; fw = fx; x = u; fx = fu; } else { if (u < x) a = u; else b = u; if (fu <= fw || w == x) { v = w; fv = fw; w = u; fw = fu; } else if (fu <= fv || v==x || v ==w) { v = u; fv = fu; } } } /* Return. */ esl_vec_DCopy(ori, n, xvec); /* build final xvec from ori, dir, x */ esl_vec_DAddScaled(xvec, dir, x, n); if (ret_x != NULL) *ret_x = x; if (ret_fx != NULL) *ret_fx = fx; ESL_DPRINTF2(("\nbrent(): %d iterations\n", niter)); ESL_DPRINTF2(("xx=%10.8f fx=%10.1f\n", x, fx)); }
/* bracket(): * SRE, Wed Jul 27 11:43:32 2005 [St. Louis] * * Purpose: Bracket a minimum. * * The minimization is quasi-one-dimensional, * starting from an initial <n>-dimension vector <ori> * in the <n>-dimensional direction <d>. * * Caller passes a ptr to the objective function <*func()>, * and a void pointer to any necessary conditional * parameters <prm>. The objective function will * be evaluated at a point <x> by calling * <(*func)(x, n, prm)>. The caller's function * is responsible to casting <prm> to whatever it's * supposed to be, which might be a ptr to a structure, * for example; typically, for a parameter optimization * problem, this holds the observed data. * * The routine works in scalar multipliers relative * to origin <ori> and direction <d>; that is, a new <n>-dimensional * point <b> is defined as <ori> + <bx><d>, for a scalar <bx>. * * The routine identifies a triplet <ax>, <bx>, <cx> such * that $a < b < c$ and such that a minimum is known to * exist in the $(a,b)$ interval because $f(b) < f(a), * f(c)$. Also, the <a..b> and <b...c> intervals are in * a golden ratio; the <b..c> interval is 1.618 times larger * than <a..b>. * * Since <d> is usually in the direction of the gradient, * the points <ax>,<bx>,<cx> might be expected to be $\geq 0$; * however, when <ori> is already close to the minimum, * it is often faster to bracket the minimum using * a negative <ax>. The caller might then try to be "clever" * and assume that the minimum is in the <bx..cx> interval * when <ax> is negative, rather than the full <ax..cx> * interval. That cleverness can fail, though, if <ori> * is already in fact the minimum, because the line minimizer * in brent() assumes a non-inclusive interval. Use * <ax..cx> as the bracket. * * Args: ori - n-dimensional starting vector * d - n-dimensional direction to minimize along * n - # of dimensions * firststep - bx is initialized to this scalar multiplier * *func() - objective function to minimize * prm - void * to any constant data that *func() needs * wrk - workspace: 1 allocated n-dimensional vector * ret_ax - RETURN: ax < bx < cx scalar bracketing triplet * ret_bx - RETURN: ...ax may be negative * ret_cx - RETURN: * ret_fa - RETURN: function evaluated at a,b,c * ret_fb - RETURN: ... f(b) < f(a),f(c) * ret_fc - RETURN: * * Returns: <eslOK> on success. * * Throws: <eslENOHALT> if it fails to converge. * * Xref: STL9/130. */ static int bracket(double *ori, double *d, int n, double firststep, double (*func)(double *, int, void *), void *prm, double *wrk, double *ret_ax, double *ret_bx, double *ret_cx, double *ret_fa, double *ret_fb, double *ret_fc) { double ax,bx,cx; /* scalar multipliers */ double fa,fb,fc; /* f() evaluations at those points */ double swapper; int niter; /* Set and evaluate our first two points f(a) and f(b), which * are initially at 0.0 and <firststep>. */ ax = 0.; /* always start w/ ax at the origin, ax=0 */ fa = (*func)(ori, n, prm); bx = firststep; esl_vec_DCopy(ori, n, wrk); esl_vec_DAddScaled(wrk, d, bx, n); fb = (*func)(wrk, n, prm); /* In principle, we usually know that the minimum m lies to the * right of a, m>=a, because d is likely to be a gradient. You * might think we want 0 = a < b < c. In practice, there's problems * with that. It's far easier to identify bad points (f(x) > f(a)) * than to identify good points (f(x) < f(a)), because letting f(x) * blow up to infinity is fine as far as bracketing is concerned. * It can be almost as hard to identify a point b that f(b) < f(a) * as it is to find the minimum in the first place! * Counterintuitively, in cases where f(b)>f(a), it's better * to just swap the a,b labels and look for c on the wrong side * of a! This often works immediately, if f(a) was reasonably * close to the minimum and f(b) and f(c) are both terrible. */ if (fb > fa) { swapper = ax; ax = bx; bx = swapper; swapper = fa; fa = fb; fb = swapper; } /* Make our first guess at c. * Remember, we don't know that b>a any more, and c might go negative. * We'll either have: a..b...c with a=0; * or: c...b..a with b=0. * In many cases, we'll immediately be done. */ cx = bx + (bx-ax)*1.618; esl_vec_DCopy(ori, n, wrk); esl_vec_DAddScaled(wrk, d, cx, n); fc = (*func)(wrk, n, prm); /* We're not satisfied until fb < fa, fc; * throughout the routine, we guarantee that fb < fa; * so we just check fc. */ niter = 0; while (fc <= fb) { /* Slide over, discarding the a point; choose * new c point even further away. */ ax = bx; bx = cx; fa = fb; fb = fc; cx = bx+(bx-ax)*1.618; esl_vec_DCopy(ori, n, wrk); esl_vec_DAddScaled(wrk, d, cx, n); fc = (*func)(wrk, n, prm); /* This is a rare instance. We've reach the minimum * by trying to bracket it. Also check that not all * three points are the same. */ if (ax != bx && bx != cx && fa == fb && fb == fc) break; niter++; if (niter > 100) ESL_EXCEPTION(eslENORESULT, "Failed to bracket a minimum."); } /* We're about to return. Assure the caller that the points * are in order a < b < c, not the other way. */ if (ax > cx) { swapper = ax; ax = cx; cx = swapper; swapper = fa; fa = fc; fc = swapper; } /* Return. */ ESL_DPRINTF2(("\nbracket(): %d iterations\n", niter)); ESL_DPRINTF2(("bracket(): triplet is %g %g %g along current direction\n", ax, bx, cx)); ESL_DPRINTF2(("bracket(): f()'s there are: %g %g %g\n\n", fa, fb, fc)); *ret_ax = ax; *ret_bx = bx; *ret_cx = cx; *ret_fa = fa; *ret_fb = fb; *ret_fc = fc; return eslOK; }
/* dump_infocontent_info * * Given an MSA with RF annotation, dump information content per column data to * an open output file. */ static int dump_infocontent_info(FILE *fp, ESL_ALPHABET *abc, double **abc_ct, int use_weights, int nali, int64_t alen, int nseq, int *i_am_rf, char *msa_name, char *alifile, char *errbuf) { int status; int apos, rfpos; double bg_ent; double *bg = NULL; double *abc_freq = NULL; double nnongap; ESL_ALLOC(bg, sizeof(double) * abc->K); esl_vec_DSet(bg, abc->K, 1./(abc->K)); bg_ent = esl_vec_DEntropy(bg, abc->K); free(bg); ESL_ALLOC(abc_freq, sizeof(double) * abc->K); fprintf(fp, "# Information content per column (bits):\n"); fprintf(fp, "# Alignment file: %s\n", alifile); fprintf(fp, "# Alignment idx: %d\n", nali); if(msa_name != NULL) { fprintf(fp, "# Alignment name: %s\n", msa_name); } fprintf(fp, "# Number of sequences: %d\n", nseq); if(use_weights) { fprintf(fp, "# IMPORTANT: Counts are weighted based on sequence weights in alignment file.\n"); } else { fprintf(fp, "# Sequence weights from alignment were ignored (if they existed).\n"); } fprintf(fp, "#\n"); if(i_am_rf != NULL) { fprintf(fp, "# %7s %7s %10s %10s\n", "rfpos", "alnpos", "freqnongap", "info(bits)"); fprintf(fp, "# %7s %7s %10s %10s\n", "-------", "-------", "----------", "----------"); } else { fprintf(fp, "# %7s %10s %10s\n", "alnpos", "freqnongap", "info(bits)"); fprintf(fp, "# %7s %10s %10s\n", "-------", "----------", "----------"); } rfpos = 0; for(apos = 0; apos < alen; apos++) { if(i_am_rf != NULL) { if(i_am_rf[apos]) { fprintf(fp, " %7d", rfpos+1); rfpos++; } else { fprintf(fp, " %7s", "-"); } } nnongap = esl_vec_DSum(abc_ct[apos], abc->K); esl_vec_DCopy(abc_ct[apos], abc->K, abc_freq); esl_vec_DNorm(abc_freq, abc->K); fprintf(fp, " %7d %10.8f %10.8f\n", apos+1, nnongap / (nnongap + abc_ct[apos][abc->K]), (bg_ent - esl_vec_DEntropy(abc_freq, abc->K))); } fprintf(fp, "//\n"); if(abc_freq != NULL) free(abc_freq); return eslOK; ERROR: ESL_FAIL(eslEINVAL, errbuf, "out of memory"); return status; /* NEVERREACHED */ }