/* Function: EVDCensoredFit() * Date: SRE, Mon Nov 17 10:01:05 1997 [St. Louis] * * Purpose: Given a /left-censored/ list or histogram of EVD-distributed * samples, as well as the number of censored samples z and the * censoring value c, * find maximum likelihood parameters lambda and * mu. * * Algorithm: Uses approach described in [Lawless82]. Solves * for lambda using Newton/Raphson iterations; * then substitutes lambda into Lawless' equation 4.2.3 * to get mu. * * Newton/Raphson algorithm developed from description in * Numerical Recipes in C [Press88]. * * Args: x - list of EVD distributed samples or x-axis of histogram * y - NULL, or y-axis of histogram * n - number of observed samples,or number of histogram bins * z - number of censored samples * c - censoring value (all x_i >= c) * ret_mu : RETURN: ML estimate of mu * ret_lambda : RETURN: ML estimate of lambda * * Return: (void) */ int EVDCensoredFit(float *x, int *y, int n, int z, float c, float *ret_mu, float *ret_lambda) { float lambda, mu; float fx; /* f(x) */ float dfx; /* f'(x) */ double esum; /* \sum e^(-lambda xi) */ double mult; double total; float tol = 1e-5; int i; /* 1. Find an initial guess at lambda: linear regression here? */ lambda = 0.2; /* 2. Use Newton/Raphson to solve Lawless 4.2.2 and find ML lambda */ for (i = 0; i < 100; i++) { Lawless422(x, y, n, z, c, lambda, &fx, &dfx); if (fabs(fx) < tol) break; /* success */ lambda = lambda - fx / dfx; /* Newton/Raphson is simple */ if (lambda <= 0.) lambda = 0.001; /* but be a little careful */ } /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed. * Resort to a bisection search. Worse convergence speed * but guaranteed to converge (unlike Newton/Raphson). * We assume (!?) that fx is a monotonically decreasing function of x; * i.e. fx > 0 if we are left of the root, fx < 0 if we * are right of the root. */ if (i == 100) { float left, right, mid; /* First we need to bracket the root */ SQD_DPRINTF2(("EVDCensoredFit(): Newton/Raphson failed; switched to bisection")); lambda = right = left = 0.2; Lawless422(x, y, n, z, c, lambda, &fx, &dfx); if (fx < 0.) { /* fix right; search left. */ do { left -= 0.03; if (left < 0.) { SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root")); return 0; } Lawless422(x, y, n, z, c, left, &fx, &dfx); } while (fx < 0.); } else { /* fix left; search right. */ do { right += 0.1; Lawless422(x, y, n, z, c, left, &fx, &dfx); if (right > 100.) { SQD_DPRINTF2(("EVDCensoredFit(): failed to bracket root")); return 0; } } while (fx > 0.); } /* now we bisection search in left/right interval */ for (i = 0; i < 100; i++) { mid = (left + right) / 2.; Lawless422(x, y, n, z, c, left, &fx, &dfx); if (fabs(fx) < tol) break; /* success */ if (fx > 0.) left = mid; else right = mid; } if (i == 100) { SQD_DPRINTF2(("EVDCensoredFit(): even the bisection search failed")); return 0; } lambda = mid; } /* 3. Substitute into Lawless 4.2.3 to find mu */ esum = total = 0.; for (i = 0; i < n; i++) { mult = (y == NULL) ? 1. : (double) y[i]; esum += mult * exp(-1. * lambda * x[i]); total += mult; } esum += (double) z * exp(-1. * lambda * c); /* term from censored data */ mu = -1. * log(esum / total) / lambda; *ret_lambda = lambda; *ret_mu = mu; return 1; }
/* Function: main_loop_pvm() * Date: SRE, Wed Aug 19 13:59:54 1998 [St. Louis] * * Purpose: Given an HMM and parameters for synthesizing random * sequences; return a histogram of scores. * (PVM version) * * Args: hmm - an HMM to calibrate. * seed - random number seed * nsample - number of seqs to synthesize * lumpsize- # of seqs per slave exchange; controls granularity * lenmean - mean length of random sequence * lensd - std dev of random seq length * fixedlen- if nonzero, override lenmean, always this len * hist - RETURN: the score histogram * ret_max - RETURN: highest score seen in simulation * extrawatch - RETURN: total CPU time spend in slaves. * ret_nslaves- RETURN: number of PVM slaves run. * * Returns: (void) * hist is alloc'ed here, and must be free'd by caller. */ static void main_loop_pvm(struct plan7_s *hmm, int seed, int nsample, int lumpsize, float lenmean, float lensd, int fixedlen, struct histogram_s **ret_hist, float *ret_max, Stopwatch_t *extrawatch, int *ret_nslaves) { struct histogram_s *hist; int master_tid; int *slave_tid; int nslaves; int nsent; /* # of seqs we've asked for so far */ int ndone; /* # of seqs we've got results for so far */ int packet; /* # of seqs to have a slave do */ float max; int slaveidx; /* id of a slave */ float *sc; /* scores returned by a slave */ Stopwatch_t slavewatch; int i; StopwatchZero(extrawatch); hist = AllocHistogram(-200, 200, 100); max = -FLT_MAX; /* Initialize PVM */ if ((master_tid = pvm_mytid()) < 0) Die("pvmd not responding -- do you have PVM running?"); #if DEBUGLEVEL >= 1 pvm_catchout(stderr); /* catch output for debugging */ #endif PVMSpawnSlaves("hmmcalibrate-pvm", &slave_tid, &nslaves); /* Initialize the slaves */ pvm_initsend(PvmDataDefault); pvm_pkfloat(&lenmean, 1, 1); pvm_pkfloat(&lensd, 1, 1); pvm_pkint( &fixedlen, 1, 1); pvm_pkint( &Alphabet_type, 1, 1); pvm_pkint( &seed, 1, 1); if (! PVMPackHMM(hmm)) Die("Failed to pack the HMM"); pvm_mcast(slave_tid, nslaves, HMMPVM_INIT); SQD_DPRINTF1(("Initialized %d slaves\n", nslaves)); /* Confirm slaves' OK status. */ PVMConfirmSlaves(slave_tid, nslaves); SQD_DPRINTF1(("Slaves confirm that they're ok...\n")); /* Load the slaves */ nsent = ndone = 0; for (slaveidx = 0; slaveidx < nslaves; slaveidx++) { packet = (nsample - nsent > lumpsize ? lumpsize : nsample - nsent); pvm_initsend(PvmDataDefault); pvm_pkint(&packet, 1, 1); pvm_pkint(&slaveidx, 1, 1); pvm_send(slave_tid[slaveidx], HMMPVM_WORK); nsent += packet; } SQD_DPRINTF1(("Loaded %d slaves\n", nslaves)); /* Receive/send loop */ sc = MallocOrDie(sizeof(float) * lumpsize); while (nsent < nsample) { /* integrity check of slaves */ PVMCheckSlaves(slave_tid, nslaves); /* receive results */ SQD_DPRINTF2(("Waiting for results...\n")); pvm_recv(-1, HMMPVM_RESULTS); pvm_upkint(&slaveidx, 1, 1); pvm_upkint(&packet, 1, 1); pvm_upkfloat(sc, packet, 1); SQD_DPRINTF2(("Got results.\n")); ndone += packet; /* store results */ for (i = 0; i < packet; i++) { AddToHistogram(hist, sc[i]); if (sc[i] > max) max = sc[i]; } /* send new work */ packet = (nsample - nsent > lumpsize ? lumpsize : nsample - nsent); pvm_initsend(PvmDataDefault); pvm_pkint(&packet, 1, 1); pvm_pkint(&slaveidx, 1, 1); pvm_send(slave_tid[slaveidx], HMMPVM_WORK); SQD_DPRINTF2(("Told slave %d to do %d more seqs.\n", slaveidx, packet)); nsent += packet; } /* Wait for the last output to come in. */ while (ndone < nsample) { /* integrity check of slaves */ PVMCheckSlaves(slave_tid, nslaves); /* receive results */ SQD_DPRINTF1(("Waiting for final results...\n")); pvm_recv(-1, HMMPVM_RESULTS); pvm_upkint(&slaveidx, 1, 1); pvm_upkint(&packet, 1, 1); pvm_upkfloat(sc, packet, 1); SQD_DPRINTF2(("Got some final results.\n")); ndone += packet; /* store results */ for (i = 0; i < packet; i++) { AddToHistogram(hist, sc[i]); if (sc[i] > max) max = sc[i]; } } /* Shut down the slaves: send -1,-1,-1. */ pvm_initsend(PvmDataDefault); packet = -1; pvm_pkint(&packet, 1, 1); pvm_pkint(&packet, 1, 1); pvm_pkint(&packet, 1, 1); pvm_mcast(slave_tid, nslaves, HMMPVM_WORK); /* Collect stopwatch results; quit the VM; return. */ for (i = 0; i < nslaves; i++) { pvm_recv(-1, HMMPVM_RESULTS); pvm_upkint(&slaveidx, 1, 1); StopwatchPVMUnpack(&slavewatch); SQD_DPRINTF1(("Slave %d finished; says it used %.2f cpu, %.2f sys\n", slaveidx, slavewatch.user, slavewatch.sys)); StopwatchInclude(extrawatch, &slavewatch); } free(slave_tid); free(sc); pvm_exit(); *ret_hist = hist; *ret_max = max; *ret_nslaves = nslaves; return; }