Esempio n. 1
0
/* Function:  esl_vec_DLogValidate()
 * Synopsis:  Verify that vector is a log-p-vector.           
 * Incept:    ER,  Tue Dec  5 09:46:51 EST 2006 [janelia]
 *
 * Purpose:   Validate a log probability vector <vec> of length <n>.
 *            The exp of each element has to be between 0 and 1, and
 *            the sum of all elements has to be 1.
 *
 * Args:      v      - log p vector to validate.
 *            n      - dimensionality of v
 *            tol    - convergence criterion applied to sum of exp v
 *            errbuf - NULL, or a failure message buffer allocated
 *                     for at least p7_ERRBUFSIZE chars. 
 *
 * Returns:   <eslOK> on success, or <eslFAIL> on failure; upon failure,
 *            if caller provided a non-<NULL> <errbuf>, an informative
 *            message is left there.
 *            
 * Throws:    <eslEMEM> on allocation failure.           
 */
int
esl_vec_DLogValidate(double *vec, int n, double tol, char *errbuf)
{
  int     status;
  double *expvec = NULL;

  if (errbuf) *errbuf = 0;
  if (n == 0) return eslOK;

  ESL_ALLOC(expvec, sizeof(double)*n);
  esl_vec_DCopy(vec, n, expvec);
  esl_vec_DExp(expvec, n); 
  if ((status = esl_vec_DValidate(expvec, n, tol, errbuf)) != eslOK) goto ERROR;
  free(expvec);
  return eslOK;

 ERROR:
  if (expvec != NULL) free(expvec);
  return status;
}
Esempio n. 2
0
/* Function:  p7_prior_CreateAmino()
 * Incept:    SRE, Sat Mar 24 09:35:36 2007 [Janelia]
 *
 * Purpose:   Creates the default mixture Dirichlet prior for protein
 *            sequences.
 *
 *            The transition priors (match, insert, delete) are all
 *            single Dirichlets, originally trained by Graeme
 *            Mitchison in the mid-1990's. Notes have been lost, but
 *            we believe they were trained on an early version of
 *            Pfam. 
 *            
 *            The match emission prior is a nine-component mixture
 *            from Kimmen Sjolander, who trained it on the Blocks9
 *            database \citep{Sjolander96}.
 *            
 *            The insert emission prior is a single Dirichlet with
 *            high $|\alpha|$, such that insert emission probabilities
 *            are essentially fixed by the prior, regardless of
 *            observed count data. The slightly polar parameterization
 *            was obtained by training on Pfam 1.0.
 *
 * Returns:   a pointer to the new <P7_PRIOR> structure.
 */
P7_PRIOR *
p7_prior_CreateAmino(void)
{
  int status;
  P7_PRIOR *pri = NULL;
  int q;
				/* default match mixture coefficients: [Sjolander96] */
  static double defmq[9] = {
    0.178091, 0.056591, 0.0960191, 0.0781233, 0.0834977, 
    0.0904123, 0.114468, 0.0682132, 0.234585 };

				/* default match mixture Dirichlet components [Sjolander96] */
  static double defm[9][20] = {
    { 0.270671, 0.039848, 0.017576, 0.016415, 0.014268, 
      0.131916, 0.012391, 0.022599, 0.020358, 0.030727, 
      0.015315, 0.048298, 0.053803, 0.020662, 0.023612,
      0.216147, 0.147226, 0.065438, 0.003758, 0.009621 },
    { 0.021465, 0.010300, 0.011741, 0.010883, 0.385651, 
      0.016416, 0.076196, 0.035329, 0.013921, 0.093517, 
      0.022034, 0.028593, 0.013086, 0.023011, 0.018866, 
      0.029156, 0.018153, 0.036100, 0.071770, 0.419641 },
    { 0.561459, 0.045448, 0.438366, 0.764167, 0.087364,
      0.259114, 0.214940, 0.145928, 0.762204, 0.247320,
      0.118662, 0.441564, 0.174822, 0.530840, 0.465529, 
      0.583402, 0.445586, 0.227050, 0.029510, 0.121090 },
    { 0.070143, 0.011140, 0.019479, 0.094657, 0.013162, 
      0.048038, 0.077000, 0.032939, 0.576639, 0.072293, 
      0.028240, 0.080372, 0.037661, 0.185037, 0.506783, 
      0.073732, 0.071587, 0.042532, 0.011254, 0.028723 },
    { 0.041103, 0.014794, 0.005610, 0.010216, 0.153602, 
      0.007797, 0.007175, 0.299635, 0.010849, 0.999446, 
      0.210189, 0.006127, 0.013021, 0.019798, 0.014509, 
      0.012049, 0.035799, 0.180085, 0.012744, 0.026466 },
    { 0.115607, 0.037381, 0.012414, 0.018179, 0.051778, 
      0.017255, 0.004911, 0.796882, 0.017074, 0.285858, 
      0.075811, 0.014548, 0.015092, 0.011382, 0.012696, 
      0.027535, 0.088333, 0.944340, 0.004373, 0.016741 },
    { 0.093461, 0.004737, 0.387252, 0.347841, 0.010822, 
      0.105877, 0.049776, 0.014963, 0.094276, 0.027761, 
      0.010040, 0.187869, 0.050018, 0.110039, 0.038668, 
      0.119471, 0.065802, 0.025430, 0.003215, 0.018742 },
    { 0.452171, 0.114613, 0.062460, 0.115702, 0.284246,
      0.140204, 0.100358, 0.550230, 0.143995, 0.700649, 
      0.276580, 0.118569, 0.097470, 0.126673, 0.143634, 
      0.278983, 0.358482, 0.661750, 0.061533, 0.199373 },
    { 0.005193, 0.004039, 0.006722, 0.006121, 0.003468, 
      0.016931, 0.003647, 0.002184, 0.005019, 0.005990, 
      0.001473, 0.004158, 0.009055, 0.003630, 0.006583, 
      0.003172, 0.003690, 0.002967, 0.002772, 0.002686 },
  };

  ESL_ALLOC(pri, sizeof(P7_PRIOR));
  pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL;

  pri->tm = esl_mixdchlet_Create(1, 3);	 /* single component; 3 params */
  pri->ti = esl_mixdchlet_Create(1, 2);	 /* single component; 2 params */
  pri->td = esl_mixdchlet_Create(1, 2);	 /* single component; 2 params */
  pri->em = esl_mixdchlet_Create(9, 20); /* 9 component; 20 params */
  pri->ei = esl_mixdchlet_Create(1, 20); /* single component; 20 params */


  if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR;

  /* Transition priors: originally from Graeme Mitchison. Notes are lost, but we believe
   * they were trained on an early version of Pfam. 
   */
  pri->tm->pq[0]       = 1.0;
  pri->tm->alpha[0][0] = 0.7939; /* TMM */
  pri->tm->alpha[0][1] = 0.0278; /* TMI */ /* Markus suggests ~10x MD, ~0.036; test! */
  pri->tm->alpha[0][2] = 0.0135; /* TMD */ /* Markus suggests 0.1x MI, ~0.004; test! */

  pri->ti->pq[0]       = 1.0;
  pri->ti->alpha[0][0] = 0.1551; /* TIM */
  pri->ti->alpha[0][1] = 0.1331; /* TII */

  pri->td->pq[0]       = 1.0;
  pri->td->alpha[0][0] = 0.9002; /* TDM */
  pri->td->alpha[0][1] = 0.5630; /* TDD */

  /* Match emission priors are from Kimmen Sjolander, trained
   * on the Blocks9 database. [Sjolander96]
   */  
  for (q = 0; q < 9; q++)
    {
      pri->em->pq[q] = defmq[q];
      esl_vec_DCopy(defm[q], 20, pri->em->alpha[q]);
    }

  /* Insert emission priors were trained on Pfam 1.0, 10 Nov 1996;
   *  see  ~/projects/plan7/InsertStatistics.
   * Inserts are slightly biased towards polar residues and away from
   * hydrophobic residues.
   */
  pri->ei->pq[0] = 1.0;
  pri->ei->alpha[0][0]  = 681.;         /* A */
  pri->ei->alpha[0][1]  = 120.;         /* C */
  pri->ei->alpha[0][2]  = 623.;         /* D */
  pri->ei->alpha[0][3]  = 651.;         /* E */
  pri->ei->alpha[0][4]  = 313.;         /* F */
  pri->ei->alpha[0][5]  = 902.;         /* G */
  pri->ei->alpha[0][6]  = 241.;         /* H */
  pri->ei->alpha[0][7]  = 371.;         /* I */
  pri->ei->alpha[0][8]  = 687.;         /* K */
  pri->ei->alpha[0][9]  = 676.;         /* L */
  pri->ei->alpha[0][10] = 143.;         /* M */
  pri->ei->alpha[0][11] = 548.;         /* N */
  pri->ei->alpha[0][12] = 647.;         /* P */
  pri->ei->alpha[0][13] = 415.;         /* Q */
  pri->ei->alpha[0][14] = 551.;         /* R */
  pri->ei->alpha[0][15] = 926.;         /* S */
  pri->ei->alpha[0][16] = 623.;         /* T */
  pri->ei->alpha[0][17] = 505.;         /* V */
  pri->ei->alpha[0][18] = 102.;         /* W */
  pri->ei->alpha[0][19] = 269.;         /* Y */

  return pri;

 ERROR:
  if (pri != NULL) p7_prior_Destroy(pri);
  return NULL;
}
Esempio n. 3
0
/* Function:  p7_prior_CreateNucleic()
 *
 * Purpose:   Creates the default mixture Dirichlet prior for nucleotide
 *            sequences.
 *
 *            The transition priors (match, insert, delete) are all
 *            single Dirichlets, trained on a portion of the rmark dataset
 *
 *            The match emission prior is an eight-component mixture
 *            trained against a portion of the rmark dataset
 *
 *            The insert emission prior is a single Dirichlet with
 *            high $|\alpha|$, such that insert emission probabilities
 *            are essentially fixed by the prior, regardless of
 *            observed count data.
 *
 * Returns:   a pointer to the new <P7_PRIOR> structure.
 */
P7_PRIOR *
p7_prior_CreateNucleic(void)
{
  int status;
  P7_PRIOR *pri = NULL;
  int q;

  /* Plus-1 Laplace prior
  int num_comp = 1;
  static double defmq[2] =  { 1.0  };
  static double defm[1][4] = {
         { 1.0, 1.0, 1.0, 1.0} //
  };
*/

  /* Match emission priors are trained on Rmark3 database
   * Xref: ~wheelert/notebook/2011/0325_nhmmer_new_parameters
   */
   int num_comp = 4;
   static double defmq[4] = { 0.24, 0.26, 0.08, 0.42  };
   static double defm[4][4] = {
         { 0.16,  0.45,  0.12,   0.39},
         { 0.09,  0.03,  0.09,   0.04},
         { 1.29,  0.40,  6.58,   0.51},
         { 1.74,  1.49,  1.57,   1.95}
	};



  ESL_ALLOC(pri, sizeof(P7_PRIOR));
  pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL;


  pri->tm = esl_mixdchlet_Create(1, 3);  // match transitions; single component; 3 params
  pri->ti = esl_mixdchlet_Create(1, 2);  // insert transitions; single component; 2 params
  pri->td = esl_mixdchlet_Create(1, 2);  // delete transitions; single component; 2 params
  pri->em = esl_mixdchlet_Create(num_comp, 4); // match emissions; X component; 4 params
  pri->ei = esl_mixdchlet_Create(1, 4); // insert emissions; single component; 4 params


  if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR;

  /* Transition priors: roughly, learned from rmark benchmark - hand-beautified (trimming overspecified significant digits)
   */
  pri->tm->pq[0]       = 1.0;
  pri->tm->alpha[0][0] = 2.0; // TMM
  pri->tm->alpha[0][1] = 0.1; // TMI
  pri->tm->alpha[0][2] = 0.1; // TMD


  pri->ti->pq[0]       = 1.0;
  pri->ti->alpha[0][0] = 0.06; // TIM
  pri->ti->alpha[0][1] = 0.2; // TII

  pri->td->pq[0]       = 1.0;
  pri->td->alpha[0][0] = 0.1; // TDM
  pri->td->alpha[0][1] = 0.2; // TDD



  /* Match emission priors  */
  for (q = 0; q < num_comp; q++)
    {
      pri->em->pq[q] = defmq[q];
      esl_vec_DCopy(defm[q], 4, pri->em->alpha[q]);
    }


  /* Insert emission priors. Should that alphas be lower? higher?
   */
  pri->ei->pq[0] = 1.0;
  esl_vec_DSet(pri->ei->alpha[0], 4, 1.0);

  return pri;

 ERROR:
  if (pri != NULL) p7_prior_Destroy(pri);
  return NULL;
}
Esempio n. 4
0
static void
utest_pvectors(void)
{
  char  *msg   = "pvector unit test failed";
  double p1[4] = { 0.25, 0.25, 0.25, 0.25 };
  double p2[4];
  double p3[4];
  float  p1f[4]; 
  float  p2f[4] = { 0.0,   0.5, 0.5,  0.0  };
  float  p3f[4];
  int    n = 4;
  double result;

  esl_vec_D2F(p1,  n, p1f);
  esl_vec_F2D(p2f, n, p2);  

  if (esl_vec_DValidate(p1,  n, 1e-12, NULL) != eslOK) esl_fatal(msg);
  if (esl_vec_FValidate(p1f, n, 1e-7,  NULL) != eslOK) esl_fatal(msg);

  result = esl_vec_DEntropy(p1,  n);          if (esl_DCompare(2.0, result, 1e-9) != eslOK) esl_fatal(msg);
  result = esl_vec_FEntropy(p1f, n);          if (esl_DCompare(2.0, result, 1e-9) != eslOK) esl_fatal(msg);
  result = esl_vec_DEntropy(p2,  n);          if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg);
  result = esl_vec_FEntropy(p2f, n);          if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg);

  result = esl_vec_DRelEntropy(p2,  p1,  n);  if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg);
  result = esl_vec_FRelEntropy(p2f, p1f, n);  if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg);

  result = esl_vec_DRelEntropy(p1,  p2,  n);  if (result != eslINFINITY)  esl_fatal(msg);
  result = esl_vec_FRelEntropy(p1f, p2f, n);  if (result != eslINFINITY)  esl_fatal(msg);

  esl_vec_DLog(p2, n);
  if (esl_vec_DLogValidate(p2, n, 1e-12, NULL) != eslOK) esl_fatal(msg);
  esl_vec_DExp(p2, n);
  if (p2[0] != 0.) esl_fatal(msg);

  esl_vec_FLog(p2f, n);
  if (esl_vec_FLogValidate(p2f, n, 1e-7, NULL) != eslOK) esl_fatal(msg);
  esl_vec_FExp(p2f, n);
  if (p2f[0] != 0.) esl_fatal(msg);

  esl_vec_DCopy(p2, n, p3);
  esl_vec_DScale(p3, n, 10.);
  esl_vec_DNorm(p3, n);
  if (esl_vec_DCompare(p2, p3, n, 1e-12) != eslOK) esl_fatal(msg);

  esl_vec_DLog(p3, n);
  result = esl_vec_DLogSum(p3, n); if (esl_DCompare(0.0, result, 1e-12) != eslOK) esl_fatal(msg);
  esl_vec_DIncrement(p3, n, 2.0);
  esl_vec_DLogNorm(p3, n);
  if (esl_vec_DCompare(p2, p3, n, 1e-12) != eslOK) esl_fatal(msg);

  esl_vec_FCopy(p2f, n, p3f);
  esl_vec_FScale(p3f, n, 10.);
  esl_vec_FNorm(p3f, n);
  if (esl_vec_FCompare(p2f, p3f, n, 1e-7) != eslOK) esl_fatal(msg);

  esl_vec_FLog(p3f, n);
  result = esl_vec_FLogSum(p3f, n); if (esl_DCompare(0.0, result, 1e-7) != eslOK) esl_fatal(msg);
  esl_vec_FIncrement(p3f, n, 2.0);
  esl_vec_FLogNorm(p3f, n);
  if (esl_vec_FCompare(p2f, p3f, n, 1e-7) != eslOK) esl_fatal(msg);

  return;
}
Esempio n. 5
0
/* Function:  p7_prior_CreateNucleicNew()
 * Incept:    TJW, Thu Nov 12 21:15:11 EST 2009 [Couch at home]
 *
 * Purpose:   Creates the default mixture Dirichlet prior for nucleotiden
 *            sequences.
 *
 *            The transition priors (match, insert, delete) are all
 *            single Dirichlets, originally trained by Graeme
 *            Mitchison in the mid-1990's. Notes have been lost, but
 *            we believe they were trained on an early version of
 *            Pfam.
 *
 *            The match emission prior is an eight-component mixture
 *            trained against a portion of the rmark dataset
 *
 *            The insert emission prior is a single Dirichlet with
 *            high $|\alpha|$, such that insert emission probabilities
 *            are essentially fixed by the prior, regardless of
 *            observed count data.
 *
 * Returns:   a pointer to the new <P7_PRIOR> structure.
 */
P7_PRIOR *
p7_prior_CreateNucleic(void)
{
  int status;
  P7_PRIOR *pri = NULL;
  int q;


  /* Match emission priors are trained on rmark database [Nawrocki 08]
   */

  /* Plus-1 Laplace prior
  int num_comp = 1;
  static double defmq[2] =  { 1.0  };
  static double defm[1][4] = {
         { 1.0, 1.0, 1.0, 1.0} //
  };
*/
/*
  int num_comp = 2;
  static double defmq[2] =  { 0.42,  0.58   };
  static double defm[2][4] = {
         { 0.94,   0.90,   0.89,   1.13}, //
         { 0.096,  0.078,  0.093,   0.089} //
  };
*/
/*
   //weird - but this performs marginally better than the best 2- 5- or 8-component mixtures tested
  // (on rmark - MER: 2 better than 5/8-comp  , 3 better than 2-comp )
   int num_comp = 4;
   static double defmq[4] = { 0.16, 0.29, 0.12, 0.43  };
   static double defm[4][4] = {
         { 0.36,   0.10,   5.3,   0.13}, // G
         { 0.05,  0.18,  0.03,   0.19}, // CT
         { 7.1,  0.13,  0.35,   0.17}, // A
         { 0.96,  0.92,  0.91,   1.19} // uniform
	};
 */

   /*On rmark, this model does only slightly better than the 2-component model
     It's chosen as the default on grounds of reasonableness, given that it shows
     a non-uniform transition:transversion ratio. It's based on the results
     of training against a portion of rmark, but the overspecified numbers
     resulting from that training have been rounded/simplified.
   */
   int num_comp = 5;
   static double defmq[5] = { 0.16, 0.13, 0.17, 0.15, 0.39 };
   static double defm[5][4] = {
         { 6.0,   0.2,  0.5,   0.2}, // A
         { 0.2,  8.0,  0.2,   0.5}, // C
         { 0.5,  0.2,  8.0,   0.2}, // G
         { 0.2,  0.5,  0.2,   4.0}, // T
         { 1.3,  1.2,  1.2,   1.4}   // uniform
	};


/* gives no improved performance in my hands over the 5-component model
  int num_comp = 8;
  static double defmq[8] = { 0.13, 0.08, 0.08, 0.13, 0.08, 0.08, 0.17, 0.25 } ;
  static double defm[8][4] = {
       { 4.0,   0.3,   0.5,   0.4}, // A
       { 0.3,   22.0,  0.3,   0.8}, // C
       { 1.0,   0.4,   28.0,  0.4}, // G
       { 0.5,   0.8,   0.3,   6.0}, // T
       { 1.8,   0.8,   6.0,   1.0}, // AG
       { 0.6,   6.0,   0.6,   2.4}, // CT
       { 0.03,  0.01,  0.02,  0.02}, // anything, but highly conserved
       { 2.0,   2.0,   2.0,   2.0}  // anything, not much conservation
       };
 */

  ESL_ALLOC(pri, sizeof(P7_PRIOR));
  pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL;


  pri->tm = esl_mixdchlet_Create(1, 3);  // match transitions; single component; 3 params
  pri->ti = esl_mixdchlet_Create(1, 2);  // insert transitions; single component; 2 params
  pri->td = esl_mixdchlet_Create(1, 2);  // delete transitions; single component; 2 params
  pri->em = esl_mixdchlet_Create(num_comp, 4); // match emissions; X component; 4 params
  pri->ei = esl_mixdchlet_Create(1, 4); // insert emissions; single component; 4 params


  if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR;

  /* Transition priors: roughly, learned from rmark benchmark - hand-beautified (trimming overspecified significant digits)
   */
  pri->tm->pq[0]       = 1.0;
  pri->tm->alpha[0][0] = 2.0; // TMM
  pri->tm->alpha[0][1] = 0.1; // TMI
  pri->tm->alpha[0][2] = 0.1; // TMD


  pri->ti->pq[0]       = 1.0;
  pri->ti->alpha[0][0] = 0.06; // TIM
  pri->ti->alpha[0][1] = 0.2; // TII

  pri->td->pq[0]       = 1.0;
  pri->td->alpha[0][0] = 0.1; // TDM
  pri->td->alpha[0][1] = 0.2; // TDD



  /* Match emission priors  */
  for (q = 0; q < num_comp; q++)
    {
      pri->em->pq[q] = defmq[q];
      esl_vec_DCopy(defm[q], 4, pri->em->alpha[q]);
    }


  /* Insert emission priors. Should that alphas be lower? higher?
   */
  pri->ei->pq[0] = 1.0;
  esl_vec_DSet(pri->ei->alpha[0], 4, 1.0);

  return pri;

 ERROR:
  if (pri != NULL) p7_prior_Destroy(pri);
  return NULL;
}
Esempio n. 6
0
/* Function:  esl_min_ConjugateGradientDescent()
 * Incept:    SRE, Wed Jun 22 08:49:42 2005 [St. Louis]
 *
 * Purpose:   n-dimensional minimization by conjugate gradient descent.
 *           
 *            An initial point is provided by <x>, a vector of <n>
 *            components. The caller also provides a function <*func()> that 
 *            compute the objective function f(x) when called as 
 *            <(*func)(x, n, prm)>, and a function <*dfunc()> that can
 *            compute the gradient <dx> at <x> when called as 
 *            <(*dfunc)(x, n, prm, dx)>, given an allocated vector <dx>
 *            to put the derivative in. Any additional data or fixed
 *            parameters that these functions require are passed by
 *            the void pointer <prm>.
 *            
 *            The first step of each iteration is to try to bracket
 *            the minimum along the current direction. The initial step
 *            size is controlled by <u[]>; the first step will not exceed 
 *            <u[i]> for any dimension <i>. (You can think of <u> as
 *            being the natural "units" to use along a graph axis, if
 *            you were plotting the objective function.)
 *
 *            The caller also provides an allocated workspace sufficient to
 *            hold four allocated n-vectors. (4 * sizeof(double) * n).
 *
 *            Iterations continue until the objective function has changed
 *            by less than a fraction <tol>. This should not be set to less than
 *            sqrt(<DBL_EPSILON>). 
 *
 *            Upon return, <x> is the minimum, and <ret_fx> is f(x),
 *            the function value at <x>.
 *            
 * Args:      x        - an initial guess n-vector; RETURN: x at the minimum
 *            u        - "units": maximum initial step size along gradient when bracketing.
 *            n        - dimensionality of all vectors
 *            *func()  - function for computing objective function f(x)
 *            *dfunc() - function for computing a gradient at x
 *            prm      - void ptr to any data/params func,dfunc need 
 *            tol      - convergence criterion applied to f(x)
 *            wrk      - allocated 4xn-vector for workspace
 *            ret_fx   - optRETURN: f(x) at the minimum
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslENOHALT> if it fails to converge in MAXITERATIONS.
 *            <eslERANGE> if the minimum is not finite, which may
 *            indicate a problem in the implementation or choice of <*func()>.
 *
 * Xref:      STL9/101.
 */
int
esl_min_ConjugateGradientDescent(double *x, double *u, int n, 
       				 double (*func)(double *, int, void *),
				 void (*dfunc)(double *, int, void *, double *),
				 void *prm, double tol, double *wrk, double *ret_fx)
{
  double oldfx;
  double coeff;
  int    i, i1;
  double *dx, *cg, *w1, *w2;
  double cvg;
  double fa,fb,fc;
  double ax,bx,cx;
  double fx;

  dx = wrk;
  cg = wrk + n;
  w1 = wrk + 2*n;
  w2 = wrk + 3*n;

  oldfx = (*func)(x, n, prm);	/* init the objective function */
  
  /* Bail out if the function is +/-inf: this can happen if the caller
   * has screwed something up, or has chosen a bad start point.
   */
  if (oldfx == eslINFINITY || oldfx == -eslINFINITY)
	  ESL_EXCEPTION(eslERANGE, "minimum not finite");


  if (dfunc != NULL) 
    {
      (*dfunc)(x, n, prm, dx);	/* find the current negative gradient, - df(x)/dxi  */
      esl_vec_DScale(dx, n, -1.0);
    } 
  else numeric_derivative(x, u, n, func, prm, 1e-4, dx); /* resort to brute force */

  esl_vec_DCopy(dx, n, cg);	/* and make that the first conjugate direction, cg  */



  /* (failsafe) convergence test: a zero direction can happen, 
   * and it either means we're stuck or we're finished (most likely stuck)
   */
  for (i1 = 0; i1 < n; i1++) 
    if (cg[i1] != 0.) break;
  if  (i1 == n) {
    if (ret_fx != NULL) *ret_fx = oldfx;
    return eslOK;
  }
  
  for (i = 0; i < MAXITERATIONS; i++)
  {

      /* Figure out the initial step size.
       */
       bx = fabs(u[0] / cg[0]);
       for (i1 = 1; i1 < n; i1++)
	 {
	   cx = fabs(u[i1] / cg[i1]);
	   if (cx < bx) bx = cx;
	 }
 
       /* Bracket the minimum.
	*/
       bracket(x, cg, n, bx, func, prm, w1,
	      &ax, &bx, &cx, 
	      &fa, &fb, &fc);
       
       /* Minimize along the line given by the conjugate gradient <cg> */
       brent(x, cg, n, func, prm, ax, cx, 1e-3, 1e-8, w2, NULL, &fx);
       esl_vec_DCopy(w2, n, x);

      /* Bail out if the function is now +/-inf: this can happen if the caller
       * has screwed something up.
       */
      if (fx == eslINFINITY || fx == -eslINFINITY)
    	  ESL_EXCEPTION(eslERANGE, "minimum not finite");


      /* Find the negative gradient at that point (temporarily in w1) */
      if (dfunc != NULL) 
	  {
	    (*dfunc)(x, n, prm, w1);
	    esl_vec_DScale(w1, n, -1.0);
	  }
      else numeric_derivative(x, u, n, func, prm, 1e-4, w1); /* resort to brute force */

      /* Calculate the Polak-Ribiere coefficient */
      for (coeff = 0., i1 = 0; i1 < n; i1++)
	      coeff += (w1[i1] - dx[i1]) * w1[i1];
      coeff /= esl_vec_DDot(dx, dx, n);
      
      /* Calculate the next conjugate gradient direction in w2 */
      esl_vec_DCopy(w1, n, w2);
      esl_vec_DAddScaled(w2, cg, coeff, n);

      /* Finishing set up for next iteration: */
      esl_vec_DCopy(w1, n, dx);
      esl_vec_DCopy(w2, n, cg);

      /* Now: x is the current point; 
       *      fx is the function value at that point;
       *      dx is the current gradient at x;
       *      cg is the current conjugate gradient direction. 
       */

      /* Main convergence test. 1e-9 factor is fudging the case where our
       * minimum is at exactly f()=0.
       */
      cvg = 2.0 * fabs((oldfx-fx)) / (1e-10 + fabs(oldfx) + fabs(fx));

//      fprintf(stderr, "(%d): Old f() = %.9f    New f() = %.9f    Convergence = %.9f\n", i, oldfx, fx, cvg);
//      fprintf(stdout, "(%d): Old f() = %.9f    New f() = %.9f    Convergence = %.9f\n", i, oldfx, fx, cvg);

#if eslDEBUGLEVEL >= 2
      printf("\nesl_min_ConjugateGradientDescent():\n");
      printf("new point:     ");
      for (i1 = 0; i1 < n; i1++)
	    printf("%g ", x[i1]);

      printf("\nnew gradient:    ");
      for (i1 = 0; i1 < n; i1++)
	    printf("%g ", dx[i1]);

      numeric_derivative(x, u, n, func, prm, 1e-4, w1);
      printf("\n(numeric grad):  ");
      for (i1 = 0; i1 < n; i1++)
	    printf("%g ", w1[i1]);

      printf("\nnew direction: ");
      for (i1 = 0; i1 < n; i1++)
	    printf("%g ", cg[i1]);

      printf("\nOld f() = %g    New f() = %g    Convergence = %g\n\n", oldfx, fx, cvg);
#endif

     if (cvg <= tol) break;

      /* Second (failsafe) convergence test: a zero direction can happen, 
       * and it either means we're stuck or we're finished (most likely stuck)
       */
      for (i1 = 0; i1 < n; i1++) 
	     if (cg[i1] != 0.) break;
      if  (i1 == n) break;

      oldfx = fx;
    }


	if (ret_fx != NULL) *ret_fx = fx;

    if (i == MAXITERATIONS)
	  ESL_FAIL(eslENOHALT, NULL, " ");
// 	  ESL_EXCEPTION(eslENOHALT, "Failed to converge in ConjugateGradientDescent()");



  return eslOK;
}
Esempio n. 7
0
/* brent():
 * SRE, Sun Jul 10 19:07:05 2005 [St. Louis]
 *
 * Purpose:   Quasi-one-dimensional minimization of a function <*func()>
 *            in <n>-dimensions, along vector <dir> starting from a
 *            point <ori>. Identifies a scalar $x$ that approximates
 *            the position of the minimum along this direction, in a
 *            given bracketing interval (<a,b>).  The minimum must
 *            have been bracketed by the caller in the <(a,b)>
 *            interval.  <a> is often 0, because we often start at the
 *            <ori>.
 *
 *            A quasi-1D scalar coordinate $x$ (such as <a> or <b>) is
 *            transformed to a point $\mathbf{p}$ in n-space as:
 *            $\mathbf{p} = \mathbf{\mbox{ori}} + x
 *            \mathbf{\mbox{dir}}$.
 *
 *            Any extra (fixed) data needed to calculate <func> can be
 *            passed through the void <prm> pointer.
 *
 *            <eps> and <t> define the relative convergence tolerance,
 *            $\mbox{tol} = \mbox{eps} |x| + t$. <eps> should not be
 *            less than the square root of the machine precision.  The
 *            <DBL_EPSILON> is 2.2e-16 on many machines with 64-bit
 *            doubles, so <eps> is on the order of 1e-8 or more. <t>
 *            is a yet smaller number, used to avoid nonconvergence in
 *            the pathological case $x=0$.
 *
 *            Upon convergence (which is guaranteed), returns <xvec>,
 *            the n-dimensional minimum. Optionally, will also return
 *            <ret_x>, the scalar <x> that resulted in that
 *            n-dimensional minimum, and <ret_fx>, the objective
 *            function <*func(x)> at the minimum.
 *
 *            This is an implementation of the R.P. Brent (1973)
 *            algorithm for one-dimensional minimization without
 *            derivatives (modified from Brent's ALGOL60 code). Uses a
 *            combination of bisection search and parabolic
 *            interpolation; should exhibit superlinear convergence in
 *            most functions.
 *
 *
 * Args:      ori     - n-vector at origin
 *            dir     - direction vector (gradient) we're following from ori
 *            n       - dimensionality of ori, dir, and xvec
 *            (*func) - ptr to caller's objective function
 *            prm     - ptr to any additional data (*func)() needs
 *            a,b     - minimum is bracketed on interval [a,b]
 *            eps     - tol = eps |x| + t; eps >= 2 * relative machine precision
 *            t       - additional factor for tol to avoid x=0 case.
 *            xvec    - RETURN: minimum, as an n-vector (caller allocated)
 *            ret_x   - optRETURN: scalar multiplier that gave xvec
 *            ret_fx  - optRETURN: f(x)
 *
 * Returns:   (void)
 *
 * Reference: See [Brent73], Chapter 5. My version is derived directly
 *            from Brent's description and his ALGOL60 code. I've
 *            preserved his variable names as much as possible, to
 *            make the routine follow his published description
 *            closely. The Brent algorithm is also discussed in
 *            Numerical Recipes [Press88].
 */
static void
brent(double *ori, double *dir, int n,
      double (*func)(double *, int, void *), void *prm,
      double a, double b, double eps, double t,
      double *xvec, double *ret_x, double *ret_fx)
{
  double w,x,v,u;               /* with [a,b]: Brent's six points     */
  double m;                     /* midpoint of current [a,b] interval */
  double tol;                   /* tolerance = eps|x| + t */
  double fu,fv,fw,fx;           /* function evaluations */
  double p,q;                   /* numerator, denominator of parabolic interpolation */
  double r;
  double d,e;                   /* last, next-to-last values of p/q  */
  double c = 1. - (1./eslCONST_GOLD); /* Brent's c; 0.381966; golden ratio */
  int    niter;			/* number of iterations */

  x=v=w= a + c*(b-a);           /* initial guess of x by golden section */
  esl_vec_DCopy(ori, n, xvec);  /* build xvec from ori, dir, x */
  esl_vec_DAddScaled(xvec, dir, x, n);
  fx=fv=fw = (*func)(xvec, n, prm);   /* initial function evaluation */

  e     = 0.;
  niter = 0;
  while (1) /* algorithm is guaranteed to converge. */
    {
      m   = 0.5 * (a+b);
      tol = eps*fabs(x) + t;
      if (fabs(x-m) <= 2*tol - 0.5*(b-a)) break; /* convergence test. */
      niter++;

      p = q = r = 0.;
      if (fabs(e) > tol)
        { /* Compute parabolic interpolation, u = x + p/q */
          r = (x-w)*(fx-fv);
          q = (x-v)*(fx-fw);
          p = (x-v)*q - (x-w)*r;
          q = 2*(q-r);
          if (q > 0) { p = -p; } else {q = -q;}
          r = e;
          e=d;                  /* e is now the next-to-last p/q  */
        }

      if (fabs(p) < fabs(0.5*q*r) || p < q*(a-x) || p < q*(b-x))
        { /* Seems well-behaved? Use parabolic interpolation to compute new point u */
          d = p/q;              /* d remembers last p/q */
          u = x+d;              /* trial point, for now... */

          if (2.0*(u-a) < tol || 2.0*(b-u) < tol) /* don't evaluate func too close to a,b */
            d = (x < m)? tol : -tol;
        }
      else /* Badly behaved? Use golden section search to compute u. */
        {
          e = (x<m)? b-x : a-x;  /* e = largest interval */
          d = c*e;
        }

      /* Evaluate f(), but not too close to x.  */
      if      (fabs(d) >= tol) u = x+d;
      else if (d > 0)          u = x+tol;
      else                     u = x-tol;
      esl_vec_DCopy(ori, n, xvec);  /* build xvec from ori, dir, u */
      esl_vec_DAddScaled(xvec, dir, u, n);
      fu = (*func)(xvec, n, prm);   /* f(u) */

      /* Bookkeeping.  */
     if (fu <= fx)
        {
          if (u < x) b = x; else a = x;
          v = w; fv = fw; w = x; fw = fx; x = u; fx = fu;
        }
      else
        {
          if (u < x) a = u; else b = u;
          if (fu <= fw || w == x)
            { v = w; fv = fw; w = u; fw = fu; }
          else if (fu <= fv || v==x || v ==w)
            { v = u; fv = fu; }
        }
    }

  /* Return.
   */
  esl_vec_DCopy(ori, n, xvec);  /* build final xvec from ori, dir, x */
  esl_vec_DAddScaled(xvec, dir, x, n);
  if (ret_x  != NULL) *ret_x  = x;
  if (ret_fx != NULL) *ret_fx = fx;
  ESL_DPRINTF2(("\nbrent(): %d iterations\n", niter));
  ESL_DPRINTF2(("xx=%10.8f fx=%10.1f\n", x, fx));
}
Esempio n. 8
0
/* bracket():
 * SRE, Wed Jul 27 11:43:32 2005 [St. Louis]
 *
 * Purpose:   Bracket a minimum. 
 *
 *            The minimization is quasi-one-dimensional, 
 *            starting from an initial <n>-dimension vector <ori>
 *            in the <n>-dimensional direction <d>.
 *            
 *            Caller passes a ptr to the objective function <*func()>,
 *            and a void pointer to any necessary conditional 
 *            parameters <prm>. The objective function will
 *            be evaluated at a point <x> by calling
 *            <(*func)(x, n, prm)>. The caller's function
 *            is responsible to casting <prm> to whatever it's
 *            supposed to be, which might be a ptr to a structure,
 *            for example; typically, for a parameter optimization
 *            problem, this holds the observed data.
 *            
 *            The routine works in scalar multipliers relative
 *            to origin <ori> and direction <d>; that is, a new <n>-dimensional
 *            point <b> is defined as <ori> + <bx><d>, for a scalar <bx>.
 *            
 *            The routine identifies a triplet <ax>, <bx>, <cx> such
 *            that $a < b < c$ and such that a minimum is known to
 *            exist in the $(a,b)$ interval because $f(b) < f(a),
 *            f(c)$. Also, the <a..b> and <b...c> intervals are in
 *            a golden ratio; the <b..c> interval is 1.618 times larger
 *            than <a..b>.
 *
 *            Since <d> is usually in the direction of the gradient,
 *            the points <ax>,<bx>,<cx> might be expected to be $\geq 0$;
 *            however, when <ori> is already close to the minimum, 
 *            it is often faster to bracket the minimum using
 *            a negative <ax>. The caller might then try to be "clever"
 *            and assume that the minimum is in the <bx..cx> interval
 *            when <ax> is negative, rather than the full <ax..cx>
 *            interval. That cleverness can fail, though, if <ori>
 *            is already in fact the minimum, because the line minimizer
 *            in brent() assumes a non-inclusive interval. Use
 *            <ax..cx> as the bracket.
 *            
 * Args:      ori       - n-dimensional starting vector
 *            d         - n-dimensional direction to minimize along
 *            n         - # of dimensions
 *            firststep - bx is initialized to this scalar multiplier
 *            *func()   - objective function to minimize
 *            prm       - void * to any constant data that *func() needs
 *            wrk       - workspace: 1 allocated n-dimensional vector
 *            ret_ax    - RETURN:  ax < bx < cx scalar bracketing triplet
 *            ret_bx    - RETURN:    ...ax may be negative
 *            ret_cx    - RETURN:    
 *            ret_fa    - RETURN:  function evaluated at a,b,c
 *            ret_fb    - RETURN:    ... f(b) < f(a),f(c)
 *            ret_fc    - RETURN:
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslENOHALT> if it fails to converge.
 *
 * Xref:      STL9/130.
 */
static int
bracket(double *ori, double *d, int n, double firststep,
	double (*func)(double *, int, void *), void *prm, 
	double *wrk, 
	double *ret_ax, double *ret_bx, double *ret_cx,
	double *ret_fa, double *ret_fb, double *ret_fc)
{
  double ax,bx,cx;		/* scalar multipliers */
  double fa,fb,fc;		/* f() evaluations at those points */
  double swapper;
  int    niter;
  
  /* Set and evaluate our first two points f(a) and f(b), which
   * are initially at 0.0 and <firststep>.
   */
  ax = 0.;  /* always start w/ ax at the origin, ax=0 */
  fa = (*func)(ori, n, prm);

  bx = firststep;
  esl_vec_DCopy(ori, n, wrk);
  esl_vec_DAddScaled(wrk, d, bx, n);
  fb = (*func)(wrk, n, prm);

  /* In principle, we usually know that the minimum m lies to the
   * right of a, m>=a, because d is likely to be a gradient.  You
   * might think we want 0 = a < b < c.  In practice, there's problems
   * with that. It's far easier to identify bad points (f(x) > f(a))
   * than to identify good points (f(x) < f(a)), because letting f(x)
   * blow up to infinity is fine as far as bracketing is concerned.
   * It can be almost as hard to identify a point b that f(b) < f(a)
   * as it is to find the minimum in the first place!
   * Counterintuitively, in cases where f(b)>f(a), it's better
   * to just swap the a,b labels and look for c on the wrong side
   * of a! This often works immediately, if f(a) was reasonably
   * close to the minimum and f(b) and f(c) are both terrible.
   */
  if (fb > fa)
    {
      swapper = ax; ax = bx; bx = swapper;
      swapper = fa; fa = fb; fb = swapper;
    }

  /* Make our first guess at c.
   * Remember, we don't know that b>a any more, and c might go negative.
   * We'll either have:      a..b...c with a=0;
   *                or:  c...b..a     with b=0.
   * In many cases, we'll immediately be done.
   */
  cx = bx + (bx-ax)*1.618;
  esl_vec_DCopy(ori, n, wrk);
  esl_vec_DAddScaled(wrk, d, cx, n);
  fc = (*func)(wrk, n, prm);
  
  /* We're not satisfied until fb < fa, fc; 
   * throughout the routine, we guarantee that fb < fa;
   * so we just check fc.
   */
  niter = 0;
  while (fc <= fb)
    {
      /* Slide over, discarding the a point; choose 
       * new c point even further away.
       */
      ax = bx; bx = cx;
      fa = fb; fb = fc;
      cx = bx+(bx-ax)*1.618;
      esl_vec_DCopy(ori, n, wrk);
      esl_vec_DAddScaled(wrk, d, cx, n);
      fc = (*func)(wrk, n, prm);

      /* This is a rare instance. We've reach the minimum
       * by trying to bracket it. Also check that not all
       * three points are the same.
       */
      if (ax != bx && bx != cx && fa == fb && fb == fc) break;

      niter++;
      if (niter > 100)
    	  ESL_EXCEPTION(eslENORESULT, "Failed to bracket a minimum.");
    }

  /* We're about to return. Assure the caller that the points
   * are in order a < b < c, not the other way.
   */
  if (ax > cx)
    {
      swapper = ax; ax = cx; cx = swapper;
      swapper = fa; fa = fc; fc = swapper;
    }

  /* Return.
   */
  ESL_DPRINTF2(("\nbracket(): %d iterations\n", niter));
  ESL_DPRINTF2(("bracket(): triplet is %g  %g  %g along current direction\n", 
		ax, bx, cx));
  ESL_DPRINTF2(("bracket(): f()'s there are: %g  %g  %g\n\n", 
		fa, fb, fc));

  *ret_ax = ax;  *ret_bx = bx;  *ret_cx = cx;
  *ret_fa = fa;  *ret_fb = fb;  *ret_fc = fc;
  return eslOK;
}
/* dump_infocontent_info
 *                   
 * Given an MSA with RF annotation, dump information content per column data to 
 * an open output file.
 */
static int dump_infocontent_info(FILE *fp, ESL_ALPHABET *abc, double **abc_ct, int use_weights, int nali, int64_t alen, int nseq, int *i_am_rf, char *msa_name, char *alifile, char *errbuf)
{
  int status;
  int apos, rfpos;
  double bg_ent;
  double *bg = NULL;
  double *abc_freq = NULL;
  double nnongap;

  ESL_ALLOC(bg, sizeof(double) * abc->K);
  esl_vec_DSet(bg, abc->K, 1./(abc->K));
  bg_ent = esl_vec_DEntropy(bg, abc->K);
  free(bg);

  ESL_ALLOC(abc_freq, sizeof(double) * abc->K);


  fprintf(fp, "# Information content per column (bits):\n");
  fprintf(fp, "# Alignment file: %s\n", alifile);
  fprintf(fp, "# Alignment idx:  %d\n", nali);
  if(msa_name != NULL) { fprintf(fp, "# Alignment name: %s\n", msa_name); }
  fprintf(fp, "# Number of sequences: %d\n", nseq);
  if(use_weights) { fprintf(fp, "# IMPORTANT: Counts are weighted based on sequence weights in alignment file.\n"); }
  else            { fprintf(fp, "# Sequence weights from alignment were ignored (if they existed).\n"); }
  fprintf(fp, "#\n");

  if(i_am_rf != NULL) { 
    fprintf(fp, "# %7s  %7s  %10s  %10s\n", "rfpos",    "alnpos",  "freqnongap", "info(bits)");
    fprintf(fp, "# %7s  %7s  %10s  %10s\n", "-------", "-------",  "----------", "----------");
  }  
  else { 
    fprintf(fp, "# %7s  %10s  %10s\n", "alnpos",  "freqnongap", "info(bits)");
    fprintf(fp, "# %7s  %10s  %10s\n", "-------", "----------", "----------");
  }

  rfpos = 0;
  for(apos = 0; apos < alen; apos++) {
    if(i_am_rf != NULL) { 
      if(i_am_rf[apos]) { 
	fprintf(fp, "  %7d", rfpos+1);
	rfpos++; 
      }
      else { 
	fprintf(fp, "  %7s", "-");
      }
    }
    nnongap = esl_vec_DSum(abc_ct[apos], abc->K);
    esl_vec_DCopy(abc_ct[apos], abc->K, abc_freq);
    esl_vec_DNorm(abc_freq, abc->K);
    fprintf(fp, "  %7d  %10.8f  %10.8f\n", apos+1, 
	    nnongap / (nnongap + abc_ct[apos][abc->K]),
	    (bg_ent - esl_vec_DEntropy(abc_freq, abc->K)));
  }
  fprintf(fp, "//\n");

  if(abc_freq != NULL) free(abc_freq);

  return eslOK;

 ERROR:
  ESL_FAIL(eslEINVAL, errbuf, "out of memory");
  return status; /* NEVERREACHED */
}