Ejemplo n.º 1
0
/* Function: EVDCensoredFit()
 * Date:     SRE, Mon Nov 17 10:01:05 1997 [St. Louis]
 * 
 * Purpose:  Given a /left-censored/ list or histogram of EVD-distributed 
 *           samples, as well as the number of censored samples z and the
 *           censoring value c,              
 *           find maximum likelihood parameters lambda and
 *           mu. 
 *           
 * Algorithm: Uses approach described in [Lawless82]. Solves
 *           for lambda using Newton/Raphson iterations;
 *           then substitutes lambda into Lawless' equation 4.2.3
 *           to get mu. 
 *           
 *           Newton/Raphson algorithm developed from description in
 *           Numerical Recipes in C [Press88]. 
 *           
 * Args:     x          - list of EVD distributed samples or x-axis of histogram
 *           y          - NULL, or y-axis of histogram
 *           n          - number of observed samples,or number of histogram bins 
 *           z          - number of censored samples
 *           c          - censoring value (all x_i >= c)
 *           ret_mu     : RETURN: ML estimate of mu
 *           ret_lambda : RETURN: ML estimate of lambda
 *           
 * Return:   (void)
 */
int
EVDCensoredFit(float *x, int *y, int n, int z, float c, 
	       float *ret_mu, float *ret_lambda)
{
  float  lambda, mu;
  float  fx;			/* f(x)  */
  float  dfx;			/* f'(x) */
  double esum;                  /* \sum e^(-lambda xi) */ 
  double mult;
  double total;
  float  tol = 1e-5;
  int    i;

  /* 1. Find an initial guess at lambda: linear regression here?
   */
  lambda = 0.2;

  /* 2. Use Newton/Raphson to solve Lawless 4.2.2 and find ML lambda
   */
  for (i = 0; i < 100; i++)
    {
      Lawless422(x, y, n, z, c, lambda, &fx, &dfx);
      if (fabs(fx) < tol) break;             /* success */
      lambda = lambda - fx / dfx;	     /* Newton/Raphson is simple */
      if (lambda <= 0.) lambda = 0.001;      /* but be a little careful  */
    }

 /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed.
   *      Resort to a bisection search. Worse convergence speed
   *      but guaranteed to converge (unlike Newton/Raphson).
   *      We assume (!?) that fx is a monotonically decreasing function of x;
   *      i.e. fx > 0 if we are left of the root, fx < 0 if we
   *      are right of the root.
   */ 
  if (i == 100)
    {
      float left, right, mid;
				/* First we need to bracket the root */
      SQD_DPRINTF2(("EVDCensoredFit(): Newton/Raphson failed; switched to bisection"));
      lambda = right = left = 0.2;
      Lawless422(x, y, n, z, c, lambda, &fx, &dfx);
      if (fx < 0.) 
	{			/* fix right; search left. */
	  do {
	    left -= 0.03;
	    if (left < 0.) { 
	      SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root")); 
	      return 0;
	    }
	    Lawless422(x, y, n, z, c, left, &fx, &dfx);
	  } while (fx < 0.);
	}
      else
	{			/* fix left; search right. */
	  do {
	    right += 0.1;
	    Lawless422(x, y, n, z, c, left, &fx, &dfx);
	    if (right > 100.) {
	      SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root"));
	      return 0;
	    }
	  } while (fx > 0.);
	}
			/* now we bisection search in left/right interval */
      for (i = 0; i < 100; i++)
	{
	  mid = (left + right) / 2.; 
	  Lawless422(x, y, n, z, c, left, &fx, &dfx);
	  if (fabs(fx) < tol) break;             /* success */
	  if (fx > 0.)	left = mid;
	  else          right = mid;
	}
      if (i == 100) {
	SQD_DPRINTF2(("EVDCensoredFit(): even the bisection search failed"));
	return 0;
      }
      lambda = mid;
    }

  /* 3. Substitute into Lawless 4.2.3 to find mu
   */
  esum =  total = 0.;
  for (i = 0; i < n; i++)
    {
      mult   = (y == NULL) ? 1. : (double) y[i];
      esum  += mult * exp(-1. * lambda * x[i]);
      total += mult;
    }
  esum += (double) z * exp(-1. * lambda * c);    /* term from censored data */
  mu = -1. * log(esum / total) / lambda;        

  *ret_lambda = lambda;
  *ret_mu     = mu;   
  return 1;
}
Ejemplo n.º 2
0
/* Function: main_loop_pvm()
 * Date:     SRE, Wed Aug 19 13:59:54 1998 [St. Louis]
 *
 * Purpose:  Given an HMM and parameters for synthesizing random
 *           sequences; return a histogram of scores.
 *           (PVM version)  
 *
 * Args:     hmm     - an HMM to calibrate.
 *           seed    - random number seed
 *           nsample - number of seqs to synthesize
 *           lumpsize- # of seqs per slave exchange; controls granularity
 *           lenmean - mean length of random sequence
 *           lensd   - std dev of random seq length
 *           fixedlen- if nonzero, override lenmean, always this len
 *           hist       - RETURN: the score histogram 
 *           ret_max    - RETURN: highest score seen in simulation
 *           extrawatch - RETURN: total CPU time spend in slaves.
 *           ret_nslaves- RETURN: number of PVM slaves run.
 *
 * Returns:  (void)
 *           hist is alloc'ed here, and must be free'd by caller.
 */
static void
main_loop_pvm(struct plan7_s *hmm, int seed, int nsample, int lumpsize,
	      float lenmean, float lensd, int fixedlen,
	      struct histogram_s **ret_hist, float *ret_max, 
	      Stopwatch_t *extrawatch, int *ret_nslaves)
{
  struct histogram_s *hist;
  int                 master_tid;
  int                *slave_tid;
  int                 nslaves;
  int                 nsent;	/* # of seqs we've asked for so far       */
  int                 ndone;	/* # of seqs we've got results for so far */
  int		      packet;	/* # of seqs to have a slave do           */
  float               max;
  int                 slaveidx;	/* id of a slave */
  float              *sc;        /* scores returned by a slave */
  Stopwatch_t         slavewatch;
  int                 i;
  
  StopwatchZero(extrawatch);
  hist = AllocHistogram(-200, 200, 100);
  max  = -FLT_MAX;

  /* Initialize PVM
   */
  if ((master_tid = pvm_mytid()) < 0)
    Die("pvmd not responding -- do you have PVM running?");
#if DEBUGLEVEL >= 1
  pvm_catchout(stderr);		/* catch output for debugging */
#endif
  PVMSpawnSlaves("hmmcalibrate-pvm", &slave_tid, &nslaves);

  /* Initialize the slaves
   */
  pvm_initsend(PvmDataDefault);
  pvm_pkfloat(&lenmean,       1, 1);
  pvm_pkfloat(&lensd,         1, 1);
  pvm_pkint(  &fixedlen,      1, 1);
  pvm_pkint(  &Alphabet_type, 1, 1);
  pvm_pkint(  &seed,          1, 1);
  if (! PVMPackHMM(hmm)) Die("Failed to pack the HMM");
  pvm_mcast(slave_tid, nslaves, HMMPVM_INIT);
  SQD_DPRINTF1(("Initialized %d slaves\n", nslaves));

  /* Confirm slaves' OK status.
   */
  PVMConfirmSlaves(slave_tid, nslaves);
  SQD_DPRINTF1(("Slaves confirm that they're ok...\n"));
 
  /* Load the slaves
   */
  nsent = ndone = 0;
  for (slaveidx = 0; slaveidx < nslaves; slaveidx++)
    {
      packet    = (nsample - nsent > lumpsize ? lumpsize : nsample - nsent);

      pvm_initsend(PvmDataDefault);
      pvm_pkint(&packet,    1, 1);
      pvm_pkint(&slaveidx,  1, 1);
      pvm_send(slave_tid[slaveidx], HMMPVM_WORK);
      nsent += packet;
    }
  SQD_DPRINTF1(("Loaded %d slaves\n", nslaves));

  /* Receive/send loop
   */
  sc = MallocOrDie(sizeof(float) * lumpsize);
  while (nsent < nsample)
    {
				/* integrity check of slaves */
      PVMCheckSlaves(slave_tid, nslaves);

				/* receive results */
      SQD_DPRINTF2(("Waiting for results...\n"));
      pvm_recv(-1, HMMPVM_RESULTS);
      pvm_upkint(&slaveidx,   1, 1);
      pvm_upkint(&packet,     1, 1);
      pvm_upkfloat(sc,   packet, 1);
      SQD_DPRINTF2(("Got results.\n"));
      ndone += packet;

				/* store results */
      for (i = 0; i < packet; i++) {
	AddToHistogram(hist, sc[i]);
	if (sc[i] > max) max = sc[i];
      }
				/* send new work */
      packet    = (nsample - nsent > lumpsize ? lumpsize : nsample - nsent);

      pvm_initsend(PvmDataDefault);
      pvm_pkint(&packet,    1, 1);
      pvm_pkint(&slaveidx,  1, 1);
      pvm_send(slave_tid[slaveidx], HMMPVM_WORK);
      SQD_DPRINTF2(("Told slave %d to do %d more seqs.\n", slaveidx, packet));
      nsent += packet;
    }
      
  /* Wait for the last output to come in.
   */
  while (ndone < nsample)
    {
				/* integrity check of slaves */
      PVMCheckSlaves(slave_tid, nslaves);

				/* receive results */
      SQD_DPRINTF1(("Waiting for final results...\n"));
      pvm_recv(-1, HMMPVM_RESULTS);
      pvm_upkint(&slaveidx, 1, 1);
      pvm_upkint(&packet,   1, 1);
      pvm_upkfloat(sc, packet, 1);
      SQD_DPRINTF2(("Got some final results.\n"));
      ndone += packet;
				/* store results */
      for (i = 0; i < packet; i++) {
	AddToHistogram(hist, sc[i]);
	if (sc[i] > max) max = sc[i];
      }
    }

  /* Shut down the slaves: send -1,-1,-1.
   */
  pvm_initsend(PvmDataDefault);
  packet = -1;
  pvm_pkint(&packet, 1, 1);
  pvm_pkint(&packet, 1, 1);
  pvm_pkint(&packet, 1, 1);
  pvm_mcast(slave_tid, nslaves, HMMPVM_WORK);

  /* Collect stopwatch results; quit the VM; return.
   */
  for (i = 0; i < nslaves; i++)
    {
      pvm_recv(-1, HMMPVM_RESULTS);
      pvm_upkint(&slaveidx, 1, 1);
      StopwatchPVMUnpack(&slavewatch);

      SQD_DPRINTF1(("Slave %d finished; says it used %.2f cpu, %.2f sys\n",
		    slaveidx, slavewatch.user, slavewatch.sys));

      StopwatchInclude(extrawatch, &slavewatch);
    }

  free(slave_tid);
  free(sc);
  pvm_exit();
  *ret_hist    = hist;
  *ret_max     = max;
  *ret_nslaves = nslaves;
  return;
}