Esempio n. 1
0
void
subset_points(point_t p[], const size_t n, const size_t k)
{
    sort_by_t(p, n);

    if (k == n) return;
    assert(k < n);

    curve_type_t curvature = classify_curve(p, n);
    assert(curvature == CRV_ENC_MAXIMA || curvature == CRV_MONO_DEC);

    if (curvature == CRV_MONO_DEC) {
        /* Keep leftmost k - 1 points, which are already in place, and
         * the rightmost point (for matching the asymptote). */
        p[k - 1] = p[n - 1];
        return;
    }

    /* Otherwise, the curvature is CRV_ENC_MAXIMA. Keep the maximum
     * and its neighbors and sort the rest by likelihood. */
    assert(n >= 3);

    size_t max_idx = point_max_index(p, n);

    if (max_idx > 1) {
        const size_t n_before = max_idx - 1;
        const size_t s = n_before * sizeof(point_t);
        point_t *buf = malloc(s);
        memcpy(buf, p, s);
        memmove(p, p + max_idx - 1, sizeof(point_t) * 3);
        memcpy(p + 3, buf, s);
        free(buf);
    }

    if (k > 3) {
        sort_by_like(p + 3, n - 3);
    }

    sort_by_t(p, k);
}
Esempio n. 2
0
point_t*
select_points(log_like_function_t *log_like, const point_t starting_pts[],
              size_t *num_pts, const size_t max_pts, const double min_t,
              const double max_t)
{
    size_t n = *num_pts;
    assert(n >= 3);

    point_t* points = malloc(sizeof(point_t) * max_pts);

    /* Copy already-evaluated points */
    memcpy(points, starting_pts, sizeof(point_t) * n);

    /* Add additional samples until the evaluated branch lengths enclose a
     * maximum or the maximum number of points is reached. */
    for (; n < max_pts; ++n) {
        curve_type_t curvature = classify_curve(points, n);

        if (curvature == CRV_ENC_MAXIMA) {
            break;
        } else if (curvature == CRV_ENC_MINIMA || curvature == CRV_UNKNOWN) {
            free(points);
            return NULL;
        }

        double proposed_t = 0.0;

        if (curvature == CRV_MONO_INC) {
            proposed_t = points[n - 1].t * 2.0;
        } else { /* curvature == CRV_MONO_DEC */
            proposed_t = points[0].t / 10.0;
        }

        double next_t = bound_point(proposed_t, points, n, min_t, max_t);

        points[n].t = next_t;
        points[n].ll = log_like->fn(next_t, log_like->args);

        sort_by_t(points, n + 1);
    }

    *num_pts = n;

    if (n < max_pts) {
        return realloc(points, sizeof(point_t) * n);
    }

    return points;
}
Esempio n. 3
0
void
subset_points(point_t p[], const size_t n, const size_t k)
{
    assert(monotonicity(p, n) == NON_MONOTONIC);
    if(k == n) return;
    assert(k <= n);
    size_t max_idx = point_max_index(p, n);

    if(max_idx != 1) {
        /* Always keep the points before and after max_idx */
        const size_t n_before = max_idx - 1;
        const size_t s = n_before * sizeof(point_t);
        point_t *buf = malloc(s);
        memcpy(buf, p, s);
        memmove(p, p + max_idx - 1, sizeof(point_t) * 3);
        memcpy(p + 3, buf, s);
        free(buf);
    }
    sort_by_like(p + 3, n - 3);
    sort_by_t(p, k);
}
Esempio n. 4
0
double
estimate_ml_t(log_like_function_t *log_like, double t[],
              const size_t n_pts, const double tolerance, bsm_t* model,
              bool* success)
{
    *success = false;
    size_t iter = 0;
    const point_t *max_pt;
    double *l = malloc(sizeof(double) * n_pts);

    /* We allocate an extra point for scratch */
    point_t *points = malloc(sizeof(point_t) * (n_pts + 1));
    evaluate_ll(log_like, t, n_pts, points);

    /* First, classify points */
    monotonicity_t m = monotonicity(points, n_pts);

    /* If the function is no longer monotonic, start over */
    if(m != NON_MONOTONIC) {
        free(points);

        size_t n = N_DEFAULT_START;
        point_t *start_pts = malloc(sizeof(point_t) * n);
        evaluate_ll(log_like, DEFAULT_START, n, start_pts);
        points = select_points(log_like, start_pts, &n, DEFAULT_MAX_POINTS);
        if(points == NULL) {
            free(l);
            *success = false;
            return NAN;
        }
        free(start_pts);
        m = monotonicity(points, n);
        if(m == MONO_DEC) {
          double ml_t = points[0].t;
          *success = true;
          free(points);
          free(l);
          return ml_t;
        } else if (m == MONO_INC) {
          *success = false;
          free(points);
          free(l);
          return NAN;
        }
        assert(n >= n_pts);
        if(n > n_pts) {
            /* Subset to top n_pts */
            subset_points(points, n, n_pts);
        }

        /* Allocate an extra point for scratch */
        points = realloc(points, sizeof(point_t) * (n_pts + 1));
    }

    max_pt = max_point(points, n_pts);
    double ml_t = max_pt->t;

    /* Re-fit */
    lcfit_bsm_rescale(max_pt->t, max_pt->ll, model);
    blit_points_to_arrays(points, n_pts, t, l);
    lcfit_fit_bsm(n_pts, t, l, model);

    for(iter = 0; iter < MAX_ITERS; iter++) {
        ml_t = lcfit_bsm_ml_t(model);

        if(isnan(ml_t)) {
            *success = false;
            break;
        }

        /* convergence check */
        if(fabs(ml_t - max_pt->t) <= tolerance) {
            *success = true;
            break;
        }

        /* Check for nonsensical ml_t - if the value is outside the bracketed
         * window, give up. */
        size_t max_idx = max_pt - points;
        if(ml_t < points[max_idx - 1].t || ml_t > points[max_idx + 1].t) {
            *success = false;
            ml_t = NAN;
            break;
        }

        /* Add ml_t estimate */
        if(ml_t < 0) {
            *success = true;
            ml_t = 1e-8;
            break;
        }

        points[n_pts].t = ml_t;
        points[n_pts].ll = log_like->fn(ml_t, log_like->args);
        ml_likelihood_calls++;

        /* Retain top n_pts by log-likelihood */
        sort_by_t(points, n_pts + 1);

        if(monotonicity(points, n_pts + 1) != NON_MONOTONIC) {
            *success = false;
            ml_t = NAN;
            break;
        }
        subset_points(points, n_pts + 1, n_pts);

        blit_points_to_arrays(points, n_pts, t, l);
        lcfit_fit_bsm(n_pts, t, l, model);
        max_pt = max_point(points, n_pts);
    }

    if(iter == MAX_ITERS)
      *success = false;

    free(l);
    free(points);

    return ml_t;
}
Esempio n. 5
0
double
estimate_ml_t(log_like_function_t *log_like, const double* t,
              size_t n_pts, const double tolerance, bsm_t* model,
              bool* success, const double min_t, const double max_t)
{
    *success = false;

    point_t *starting_pts = malloc(sizeof(point_t) * n_pts);
    evaluate_ll(log_like, t, n_pts, starting_pts);

    const size_t orig_n_pts = n_pts;
    point_t* points = select_points(log_like, starting_pts, &n_pts,
                                    DEFAULT_MAX_POINTS, min_t, max_t);
    free(starting_pts);

    if (points == NULL) {
        fprintf(stderr, "ERROR: select_points returned NULL\n");
        *success = false;
        return NAN;
    }

    curve_type_t curvature = classify_curve(points, n_pts);

    if (!(curvature == CRV_ENC_MAXIMA || curvature == CRV_MONO_DEC)) {
        fprintf(stderr, "ERROR: "
                "points don't enclose a maximum and aren't decreasing\n");

        free(points);
        *success = false;
        return NAN;
    }

    /* From here on, curvature is CRV_ENC_MAXIMA or CRV_MONO_DEC, and
     * thus ml_t is zero or positive (but not infinite). */

    assert(n_pts >= orig_n_pts);
    if (n_pts > orig_n_pts) {
        /* Subset to top orig_n_pts */
        subset_points(points, n_pts, orig_n_pts);
        n_pts = orig_n_pts;
    }

    assert(points[0].t >= min_t);
    assert(points[n_pts - 1].t <= max_t);

#ifdef LCFIT_AUTO_VERBOSE
    fprintf(stderr, "starting iterative fit\n");

    fprintf(stderr, "starting points: ");
    print_points(stderr, points, n_pts);
    fprintf(stderr, "\n");
#endif /* LCFIT_AUTO_VERBOSE */

    /* Allocate an extra point for scratch */
    points = realloc(points, sizeof(point_t) * (n_pts + 1));

    size_t iter = 0;
    const point_t* max_pt = NULL;
    double ml_t = 0.0;
    double prev_t = 0.0;

    for (iter = 0; iter < MAX_ITERS; iter++) {
        max_pt = max_point(points, n_pts);

        /* Re-fit */
        lcfit_bsm_rescale(max_pt->t, max_pt->ll, model);

        double* tbuf = malloc(sizeof(double) * n_pts);
        double* lbuf = malloc(sizeof(double) * n_pts);

        blit_points_to_arrays(points, n_pts, tbuf, lbuf);

        double* wbuf = malloc(sizeof(double) * n_pts);
        double alpha = (double) iter / (MAX_ITERS - 1);

        for (size_t i = 0; i < n_pts; ++i) {
            wbuf[i] = exp(alpha * (lbuf[i] - max_pt->ll));
        }

#ifdef LCFIT_AUTO_VERBOSE
        fprintf(stderr, "weights: ");
        for (size_t i = 0; i < n_pts; ++i) {
            fprintf(stderr, "%g ", wbuf[i]);
        }
        fprintf(stderr, "\n");
#endif /* LCFIT_AUTO_VERBOSE */

        lcfit_fit_bsm_weight(n_pts, tbuf, lbuf, wbuf, model, 250);

        free(tbuf);
        free(lbuf);
        free(wbuf);

        ml_t = lcfit_bsm_ml_t(model);

        if (isnan(ml_t)) {
            fprintf(stderr, "ERROR: "
                    "lcfit_bsm_ml_t returned NaN"
                    ", model = { %.3f, %.3f, %.6f, %.6f }\n",
                    model->c, model->m, model->r, model->b);
            *success = false;
            break;
        }

        if (curvature == CRV_ENC_MAXIMA) {
            /* Stop if the modeled maximum likelihood branch length is
             * within tolerance of the empirical maximum. */
            if (rel_err(max_pt->t, ml_t) <= tolerance) {
                *success = true;
                break;
            }
        }

        double next_t = bound_point(ml_t, points, n_pts, min_t, max_t);

        /* Stop if the next sample point is within tolerance of the
         * previous sample point. */
        if (rel_err(prev_t, next_t) <= tolerance) {
            *success = true;
            break;
        }

        points[n_pts].t = next_t;
        points[n_pts].ll = log_like->fn(next_t, log_like->args);

        prev_t = next_t;

        sort_by_t(points, n_pts + 1);
        curvature = classify_curve(points, n_pts + 1);

        if (!(curvature == CRV_ENC_MAXIMA || curvature == CRV_MONO_DEC)) {
            fprintf(stderr, "ERROR: "
                    "after iteration points don't enclose a maximum "
                    "and aren't decreasing\n");
            *success = false;
            break;
        }

        ++n_pts;
        points = realloc(points, sizeof(point_t) * (n_pts + 1));

#ifdef LCFIT_AUTO_VERBOSE
        fprintf(stderr, "current points: ");
        print_points(stderr, points, n_pts);
        fprintf(stderr, "\n");
#endif /* LCFIT_AUTO_VERBOSE */
    }

    if (iter == MAX_ITERS) {
        fprintf(stderr, "WARNING: maximum number of iterations reached\n");
    }

    free(points);

#ifdef LCFIT_AUTO_VERBOSE
    fprintf(stderr, "ending iterative fit after %zu iteration(s)\n", iter);
#endif /* LCFIT_AUTO_VERBOSE */

    if (ml_t < min_t) {
        ml_t = min_t;
    } else if (ml_t > max_t) {
        ml_t = max_t;
    }

    return ml_t;
}