void subset_points(point_t p[], const size_t n, const size_t k) { sort_by_t(p, n); if (k == n) return; assert(k < n); curve_type_t curvature = classify_curve(p, n); assert(curvature == CRV_ENC_MAXIMA || curvature == CRV_MONO_DEC); if (curvature == CRV_MONO_DEC) { /* Keep leftmost k - 1 points, which are already in place, and * the rightmost point (for matching the asymptote). */ p[k - 1] = p[n - 1]; return; } /* Otherwise, the curvature is CRV_ENC_MAXIMA. Keep the maximum * and its neighbors and sort the rest by likelihood. */ assert(n >= 3); size_t max_idx = point_max_index(p, n); if (max_idx > 1) { const size_t n_before = max_idx - 1; const size_t s = n_before * sizeof(point_t); point_t *buf = malloc(s); memcpy(buf, p, s); memmove(p, p + max_idx - 1, sizeof(point_t) * 3); memcpy(p + 3, buf, s); free(buf); } if (k > 3) { sort_by_like(p + 3, n - 3); } sort_by_t(p, k); }
point_t* select_points(log_like_function_t *log_like, const point_t starting_pts[], size_t *num_pts, const size_t max_pts, const double min_t, const double max_t) { size_t n = *num_pts; assert(n >= 3); point_t* points = malloc(sizeof(point_t) * max_pts); /* Copy already-evaluated points */ memcpy(points, starting_pts, sizeof(point_t) * n); /* Add additional samples until the evaluated branch lengths enclose a * maximum or the maximum number of points is reached. */ for (; n < max_pts; ++n) { curve_type_t curvature = classify_curve(points, n); if (curvature == CRV_ENC_MAXIMA) { break; } else if (curvature == CRV_ENC_MINIMA || curvature == CRV_UNKNOWN) { free(points); return NULL; } double proposed_t = 0.0; if (curvature == CRV_MONO_INC) { proposed_t = points[n - 1].t * 2.0; } else { /* curvature == CRV_MONO_DEC */ proposed_t = points[0].t / 10.0; } double next_t = bound_point(proposed_t, points, n, min_t, max_t); points[n].t = next_t; points[n].ll = log_like->fn(next_t, log_like->args); sort_by_t(points, n + 1); } *num_pts = n; if (n < max_pts) { return realloc(points, sizeof(point_t) * n); } return points; }
void subset_points(point_t p[], const size_t n, const size_t k) { assert(monotonicity(p, n) == NON_MONOTONIC); if(k == n) return; assert(k <= n); size_t max_idx = point_max_index(p, n); if(max_idx != 1) { /* Always keep the points before and after max_idx */ const size_t n_before = max_idx - 1; const size_t s = n_before * sizeof(point_t); point_t *buf = malloc(s); memcpy(buf, p, s); memmove(p, p + max_idx - 1, sizeof(point_t) * 3); memcpy(p + 3, buf, s); free(buf); } sort_by_like(p + 3, n - 3); sort_by_t(p, k); }
double estimate_ml_t(log_like_function_t *log_like, double t[], const size_t n_pts, const double tolerance, bsm_t* model, bool* success) { *success = false; size_t iter = 0; const point_t *max_pt; double *l = malloc(sizeof(double) * n_pts); /* We allocate an extra point for scratch */ point_t *points = malloc(sizeof(point_t) * (n_pts + 1)); evaluate_ll(log_like, t, n_pts, points); /* First, classify points */ monotonicity_t m = monotonicity(points, n_pts); /* If the function is no longer monotonic, start over */ if(m != NON_MONOTONIC) { free(points); size_t n = N_DEFAULT_START; point_t *start_pts = malloc(sizeof(point_t) * n); evaluate_ll(log_like, DEFAULT_START, n, start_pts); points = select_points(log_like, start_pts, &n, DEFAULT_MAX_POINTS); if(points == NULL) { free(l); *success = false; return NAN; } free(start_pts); m = monotonicity(points, n); if(m == MONO_DEC) { double ml_t = points[0].t; *success = true; free(points); free(l); return ml_t; } else if (m == MONO_INC) { *success = false; free(points); free(l); return NAN; } assert(n >= n_pts); if(n > n_pts) { /* Subset to top n_pts */ subset_points(points, n, n_pts); } /* Allocate an extra point for scratch */ points = realloc(points, sizeof(point_t) * (n_pts + 1)); } max_pt = max_point(points, n_pts); double ml_t = max_pt->t; /* Re-fit */ lcfit_bsm_rescale(max_pt->t, max_pt->ll, model); blit_points_to_arrays(points, n_pts, t, l); lcfit_fit_bsm(n_pts, t, l, model); for(iter = 0; iter < MAX_ITERS; iter++) { ml_t = lcfit_bsm_ml_t(model); if(isnan(ml_t)) { *success = false; break; } /* convergence check */ if(fabs(ml_t - max_pt->t) <= tolerance) { *success = true; break; } /* Check for nonsensical ml_t - if the value is outside the bracketed * window, give up. */ size_t max_idx = max_pt - points; if(ml_t < points[max_idx - 1].t || ml_t > points[max_idx + 1].t) { *success = false; ml_t = NAN; break; } /* Add ml_t estimate */ if(ml_t < 0) { *success = true; ml_t = 1e-8; break; } points[n_pts].t = ml_t; points[n_pts].ll = log_like->fn(ml_t, log_like->args); ml_likelihood_calls++; /* Retain top n_pts by log-likelihood */ sort_by_t(points, n_pts + 1); if(monotonicity(points, n_pts + 1) != NON_MONOTONIC) { *success = false; ml_t = NAN; break; } subset_points(points, n_pts + 1, n_pts); blit_points_to_arrays(points, n_pts, t, l); lcfit_fit_bsm(n_pts, t, l, model); max_pt = max_point(points, n_pts); } if(iter == MAX_ITERS) *success = false; free(l); free(points); return ml_t; }
double estimate_ml_t(log_like_function_t *log_like, const double* t, size_t n_pts, const double tolerance, bsm_t* model, bool* success, const double min_t, const double max_t) { *success = false; point_t *starting_pts = malloc(sizeof(point_t) * n_pts); evaluate_ll(log_like, t, n_pts, starting_pts); const size_t orig_n_pts = n_pts; point_t* points = select_points(log_like, starting_pts, &n_pts, DEFAULT_MAX_POINTS, min_t, max_t); free(starting_pts); if (points == NULL) { fprintf(stderr, "ERROR: select_points returned NULL\n"); *success = false; return NAN; } curve_type_t curvature = classify_curve(points, n_pts); if (!(curvature == CRV_ENC_MAXIMA || curvature == CRV_MONO_DEC)) { fprintf(stderr, "ERROR: " "points don't enclose a maximum and aren't decreasing\n"); free(points); *success = false; return NAN; } /* From here on, curvature is CRV_ENC_MAXIMA or CRV_MONO_DEC, and * thus ml_t is zero or positive (but not infinite). */ assert(n_pts >= orig_n_pts); if (n_pts > orig_n_pts) { /* Subset to top orig_n_pts */ subset_points(points, n_pts, orig_n_pts); n_pts = orig_n_pts; } assert(points[0].t >= min_t); assert(points[n_pts - 1].t <= max_t); #ifdef LCFIT_AUTO_VERBOSE fprintf(stderr, "starting iterative fit\n"); fprintf(stderr, "starting points: "); print_points(stderr, points, n_pts); fprintf(stderr, "\n"); #endif /* LCFIT_AUTO_VERBOSE */ /* Allocate an extra point for scratch */ points = realloc(points, sizeof(point_t) * (n_pts + 1)); size_t iter = 0; const point_t* max_pt = NULL; double ml_t = 0.0; double prev_t = 0.0; for (iter = 0; iter < MAX_ITERS; iter++) { max_pt = max_point(points, n_pts); /* Re-fit */ lcfit_bsm_rescale(max_pt->t, max_pt->ll, model); double* tbuf = malloc(sizeof(double) * n_pts); double* lbuf = malloc(sizeof(double) * n_pts); blit_points_to_arrays(points, n_pts, tbuf, lbuf); double* wbuf = malloc(sizeof(double) * n_pts); double alpha = (double) iter / (MAX_ITERS - 1); for (size_t i = 0; i < n_pts; ++i) { wbuf[i] = exp(alpha * (lbuf[i] - max_pt->ll)); } #ifdef LCFIT_AUTO_VERBOSE fprintf(stderr, "weights: "); for (size_t i = 0; i < n_pts; ++i) { fprintf(stderr, "%g ", wbuf[i]); } fprintf(stderr, "\n"); #endif /* LCFIT_AUTO_VERBOSE */ lcfit_fit_bsm_weight(n_pts, tbuf, lbuf, wbuf, model, 250); free(tbuf); free(lbuf); free(wbuf); ml_t = lcfit_bsm_ml_t(model); if (isnan(ml_t)) { fprintf(stderr, "ERROR: " "lcfit_bsm_ml_t returned NaN" ", model = { %.3f, %.3f, %.6f, %.6f }\n", model->c, model->m, model->r, model->b); *success = false; break; } if (curvature == CRV_ENC_MAXIMA) { /* Stop if the modeled maximum likelihood branch length is * within tolerance of the empirical maximum. */ if (rel_err(max_pt->t, ml_t) <= tolerance) { *success = true; break; } } double next_t = bound_point(ml_t, points, n_pts, min_t, max_t); /* Stop if the next sample point is within tolerance of the * previous sample point. */ if (rel_err(prev_t, next_t) <= tolerance) { *success = true; break; } points[n_pts].t = next_t; points[n_pts].ll = log_like->fn(next_t, log_like->args); prev_t = next_t; sort_by_t(points, n_pts + 1); curvature = classify_curve(points, n_pts + 1); if (!(curvature == CRV_ENC_MAXIMA || curvature == CRV_MONO_DEC)) { fprintf(stderr, "ERROR: " "after iteration points don't enclose a maximum " "and aren't decreasing\n"); *success = false; break; } ++n_pts; points = realloc(points, sizeof(point_t) * (n_pts + 1)); #ifdef LCFIT_AUTO_VERBOSE fprintf(stderr, "current points: "); print_points(stderr, points, n_pts); fprintf(stderr, "\n"); #endif /* LCFIT_AUTO_VERBOSE */ } if (iter == MAX_ITERS) { fprintf(stderr, "WARNING: maximum number of iterations reached\n"); } free(points); #ifdef LCFIT_AUTO_VERBOSE fprintf(stderr, "ending iterative fit after %zu iteration(s)\n", iter); #endif /* LCFIT_AUTO_VERBOSE */ if (ml_t < min_t) { ml_t = min_t; } else if (ml_t > max_t) { ml_t = max_t; } return ml_t; }