static void test_uniform01(RandomNumberGenerator &rng) {
    vector<size_t> counts(PSI_DF);
    vector<double> probabilities(PSI_DF);
    size_t i;

    for (i = 0; i < probabilities.size(); i++)
        probabilities[i] = 1/static_cast<double>(PSI_DF);
    for (i = 0; i < NSAMPLES; i++)
        counts[static_cast<size_t>(floor(rng.next()*PSI_DF))]++;
    assert(psi_test(counts, probabilities, NSAMPLES));
}
Пример #2
0
vector<int> draw_crp_init_counts(int num_datum, double alpha,
				      RandomNumberGenerator &rng) {
  vector<int> counts;
  double rand_u;
  int draw;
  int sum_counts = 0;
  for(int draw_idx=0; draw_idx<num_datum; draw_idx++) {
    rand_u = rng.next();
    draw = numerics::crp_draw_sample(counts, sum_counts, alpha, rand_u);
    sum_counts++;
    insert_into_counts(draw, counts);
  }
  return counts;
}
Пример #3
0
int main(int argc, char** argv) {
    cout << "Begin:: test_cluster" << endl;
    RandomNumberGenerator rng;

    // set some test sizing parameters
    int max_value = 20;
    int num_rows = 3;
    int num_cols = 3;

    // create the objects
    map<int, map<string, double> > hypers_m;
    for (int i = 0; i < num_cols; i++) {
        hypers_m[i] = create_default_hypers();
    }
    vector<map<string, double>*> hypers_v;
    map<int, map<string, double> >::iterator hm_it;
    for (hm_it = hypers_m.begin(); hm_it != hypers_m.end(); hm_it++) {
        int key = hm_it->first;
        map<string, double>& hypers = hm_it->second;
        hypers_v.push_back(&hypers);
        cout << "hypers_" << key << ": " << hypers << endl;
    }
    cout << "hypers_v: " << hypers_v << endl;

    Cluster cd(hypers_v);
    vector<ComponentModel*> p_cm_v;
    for (int col_idx = 0; col_idx < num_cols; col_idx++) {
        ContinuousComponentModel *p_cm = new ContinuousComponentModel(
            *hypers_v[col_idx]);
        p_cm_v.push_back(p_cm);
    }

    // print the empty cluster
    cout << endl << endl << "begin empty cluster print" << endl;
    cout << cd << endl;
    cout << "end empty cluster print" << endl << endl << endl;

    // generate random data;
    vector<vector<double> > rows;
    for (int row_idx = 0; row_idx < num_rows; row_idx++) {
        vector<double> row_data;
        for (int col_idx = 0; col_idx < num_cols; col_idx++) {
            double random_value = (rng.nexti(max_value) + 1) * rng.next();
            row_data.push_back(random_value);
        }
        rows.push_back(row_data);
    }

    // poplute the objects
    cout << "Populating objects" << endl;
    for (int row_idx = 0; row_idx < num_rows; row_idx++) {
        vector<double> row_data = rows[row_idx];
        for (int col_idx = 0; col_idx < num_cols; col_idx++) {
            double random_value = rows[row_idx][col_idx];
            p_cm_v[col_idx]->insert_element(random_value);
        }
        cd.insert_row(row_data, row_idx);
    }

    // test score equivalence
    vector<double> score_v;
    double sum_scores = 0;
    for (int col_idx = 0; col_idx < num_cols; col_idx++) {
        double suff_score = p_cm_v[col_idx]->calc_marginal_logp();
        score_v.push_back(suff_score);
        sum_scores += suff_score;
    }
    cout << "vector of separate suffstats scores after population: ";
    cout << score_v << endl;
    cout << "sum separate scores: " << sum_scores << endl;
    cout << "Cluster score with same data: " << cd.calc_sum_marginal_logps() <<
         endl;
    cout << endl;
    //
    assert(is_almost(sum_scores, cd.calc_sum_marginal_logps(), 1E-10));



    // test hypers
    for (int which_col = 0; which_col < num_cols; which_col++) {
        int N_grid = 11;
        double test_scale = 10;
        ContinuousComponentModel *p_ccm_i = dynamic_cast<ContinuousComponentModel*>
                                            (cd.p_model_v[which_col]);
        double r, nu, s, mu;
        double precision = 1E-10;
        p_ccm_i->get_hyper_doubles(r, nu, s, mu);
        double score_0 = p_ccm_i->calc_marginal_logp();
        vector<double> hyper_grid;
        vector<double> hyper_conditionals;
        double curr_hyper_conditional_in_grid;
        //
        //    test 'r' hyper
        cout << "testing r conditionals" << endl;
        hyper_grid = log_linspace(r / test_scale, r * test_scale, N_grid);
        hyper_conditionals = cd.calc_hyper_conditionals(which_col, "r", hyper_grid);
        cout << "r_grid from function: " << hyper_grid << endl;
        cout << "r_conditioanls from function: " << hyper_conditionals << endl;
        curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2];
        cout << "curr r conditional in grid: " << curr_hyper_conditional_in_grid <<
             endl;
        assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision));

        // map<string, double> &hypers = cd.get_hypers_i(which_col);
        map<string, double>& hypers = *(*cd.p_model_v[which_col]).p_hypers;
        double prior_r = hypers["r"];
        double new_r = hyper_grid[0];
        //
        cout << endl << "testing incorporate hyper update" << endl;
        cout << "new r: " << new_r << endl;
        hypers["r"] = new_r;
        cd.incorporate_hyper_update(which_col);
        cout << "marginal logp with new r: " <<
             cd.p_model_v[which_col]->calc_marginal_logp() << endl;
        //
        cout << "changing r back to: " << prior_r << endl;
        hypers["r"] = prior_r;
        cd.incorporate_hyper_update(which_col);
        cout << "marginal logp with prior r: " <<
             cd.p_model_v[which_col]->calc_marginal_logp() << endl;
        cout << "done testing incorporate hyper update on col" << endl << endl;

        //
        //    test 'nu' hyper
        cout << "testing nu conditionals" << endl;
        hyper_grid = log_linspace(nu / test_scale, nu * test_scale, N_grid);
        hyper_conditionals = cd.calc_hyper_conditionals(which_col, "nu", hyper_grid);
        cout << "nu_grid: " << hyper_grid << endl;
        cout << "nu_conditionals: " << hyper_conditionals << endl;
        curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2];
        cout << "curr nu conditional in grid: " << curr_hyper_conditional_in_grid <<
             endl;
        assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision));
        //
        //    test 's' hyper
        cout << "testing s conditionals" << endl;
        hyper_grid = log_linspace(s / test_scale, s * test_scale, N_grid);
        hyper_conditionals = cd.calc_hyper_conditionals(which_col, "s", hyper_grid);
        cout << "s_grid: " << hyper_grid << endl;
        cout << "s_conditionals: " << hyper_conditionals << endl;
        curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2];
        cout << "curr s conditional in grid: " << curr_hyper_conditional_in_grid <<
             endl;
        assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision));
        //
        //    test 'mu' hyper
        cout << "testing mu conditionals" << endl;
        hyper_grid = log_linspace(mu / test_scale, mu * test_scale, N_grid);
        hyper_conditionals = cd.calc_hyper_conditionals(which_col, "mu", hyper_grid);
        cout << "mu_grid: " << hyper_grid << endl;
        cout << "mu_conditionals: " << hyper_conditionals << endl;
        curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2];
        cout << "curr  mu conditional in grid: " << curr_hyper_conditional_in_grid <<
             endl;
        assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision));
    }







    // depopulate the objects
    cout << "De-populating objects" << endl;
    for (int row_idx = 0; row_idx < num_rows; row_idx++) {
        vector<double> row_data = rows[row_idx];
        for (int col_idx = 0; col_idx < num_cols; col_idx++) {
            double random_value = rows[row_idx][col_idx];
            p_cm_v[col_idx]->remove_element(random_value);
        }
        cd.remove_row(row_data, row_idx);
    }

    // test score equivalence
    score_v.clear();
    sum_scores = 0;
    for (int col_idx = 0; col_idx < num_cols; col_idx++) {
        double suff_score = p_cm_v[col_idx]->calc_marginal_logp();
        score_v.push_back(suff_score);
        sum_scores += suff_score;
    }
    cout << "vector of separate suffstats scores after depopulation: ";
    cout << score_v << endl;
    cout << "sum separate scores: " << sum_scores << endl;
    cout << "Cluster score with same data: " << cd.calc_sum_marginal_logps() <<
         endl;
    cout << endl;
    //
    assert(is_almost(sum_scores, cd.calc_sum_marginal_logps(), 1E-10));

    // test ability to remove columns
    //
    // poplute the cluster object
    cout << "Populating objects" << endl;
    for (int row_idx = 0; row_idx < num_rows; row_idx++) {
        vector<double> row_data = rows[row_idx];
        cd.insert_row(row_data, row_idx);
    }
    cout << "cluster after population" << endl;
    cout << cd << endl;
    //
    // depopulate columns one by one
    while (cd.get_num_cols() > 0) {
        int col_idx = cd.get_num_cols() - 1;
        cout << "removing column: " << col_idx << endl;
        cd.remove_col(col_idx);
        cout << "removed column: " << col_idx << endl;
        cout << "cluster now looks like: " << endl;
        cout << cd << endl;
    }

    while (p_cm_v.size() != 0) {
        ComponentModel *p_cm = p_cm_v.back();
        delete p_cm;
        p_cm_v.pop_back();
    }

    cout << "Stop:: test_cluster" << endl;
}
int main(int argc, char** argv) {
    cout << endl << "Begin:: test_continuous_component_model" << endl;
    RandomNumberGenerator rng;

    // test settings
    int max_randi = 30;
    int num_values_to_test = 10;
    double precision = 1E-10;

    // generate all the random data to use
    //
    // initial parameters
    double r0 = rng.nexti(max_randi) * rng.next();
    double nu0 = rng.nexti(max_randi) * rng.next();
    double s0 = rng.nexti(max_randi) * rng.next();
    double mu0 = rng.nexti(max_randi) * rng.next();
    //
    // elements to add
    vector<double> values_to_test;
    for (int i = 0; i < num_values_to_test; i++) {
        double rand_value = rng.nexti(max_randi) * rng.next();
        values_to_test.push_back(rand_value);
    }
    // remove in a reversed order and a different order
    vector<double> values_to_test_reversed = values_to_test;
    std::reverse(values_to_test_reversed.begin(), values_to_test_reversed.end());
    vector<double> values_to_test_shuffled = values_to_test;
    random_shuffle(values_to_test_shuffled.begin(),
                   values_to_test_shuffled.end(),
                   rng);

    // print generated values
    //
    cout << endl << "initial parameters: " << "\t";
    cout << "r0: " << r0 << "\t";
    cout << "nu0: " << nu0 << "\t";
    cout << "s0: " << s0 << "\t";
    cout << "mu0: " << mu0 << endl;
    cout << "values_to_test: " << values_to_test << endl;
    cout << "values_to_test_shuffled: " << values_to_test_shuffled << endl;

    // FIXME: should compare with a fixed dataset with known
    // post-update hyper values and score

    // FIXME: should be manually calling numerics:: functions
    // to compare component models results with

    // create the component model object
    //
    //       r, nu, s, mu
    map<string, double> hypers;
    hypers["r"] = r0;
    hypers["nu"] = nu0;
    hypers["s"] = s0;
    hypers["mu"] = mu0;
    CCM ccm(hypers);
    cout << endl << "initial component model object" << endl;
    cout << ccm << endl;

    // verify initial parameters
    //
    int count;
    double sum_x, sum_x_sq;
    double r, nu, s, mu;
    ccm.get_suffstats(count, sum_x, sum_x_sq);
    ccm.get_hyper_doubles(r, nu, s, mu);
    assert(count == 0);
    assert(is_almost(sum_x, 0, precision));
    assert(is_almost(sum_x_sq, 0, precision));
    assert(is_almost(r, r0, precision));
    assert(is_almost(nu, nu0, precision));
    assert(is_almost(s, s0, precision));
    assert(is_almost(mu, mu0, precision));
    assert(is_almost(ccm.calc_marginal_logp(), 0, precision));

    // push data into component model
    insert_elements(ccm, values_to_test);
    cout << endl << "component model after insertion of data" << endl;
    cout << ccm << endl;
    // ensure count is proper
    assert(ccm.get_count() == num_values_to_test);
    // remove data from component model in REVERSED order
    remove_elements(ccm, values_to_test_reversed);
    cout << endl << "component model after removal of data in reversed order" <<
         endl;
    cout << ccm << endl;
    // ensure initial values are recovered
    ccm.get_suffstats(count, sum_x, sum_x_sq);
    ccm.get_hyper_doubles(r, nu, s, mu);
    assert(count == 0);
    assert(is_almost(sum_x, 0, precision));
    assert(is_almost(sum_x_sq, 0, precision));
    assert(is_almost(r, r0, precision));
    assert(is_almost(nu, nu0, precision));
    assert(is_almost(s, s0, precision));
    assert(is_almost(mu, mu0, precision));
    assert(is_almost(ccm.calc_marginal_logp(), 0, precision));

    // push data into component model
    insert_elements(ccm, values_to_test);
    cout << endl << "component model after insertion of data" << endl;
    cout << ccm << endl;
    // ensure count is proper
    assert(ccm.get_count() == num_values_to_test);
    // remove data from component model in SHUFFLED order
    remove_elements(ccm, values_to_test_shuffled);
    cout << endl << "component model after removal of data in shuffled order" <<
         endl;
    cout << ccm << endl;
    // ensure initial values are recovered
    ccm.get_suffstats(count, sum_x, sum_x_sq);
    ccm.get_hyper_doubles(r, nu, s, mu);
    assert(count == 0);
    assert(is_almost(sum_x, 0, precision));
    assert(is_almost(sum_x_sq, 0, precision));
    assert(is_almost(r, r0, precision));
    assert(is_almost(nu, nu0, precision));
    assert(is_almost(s, s0, precision));
    assert(is_almost(mu, mu0, precision));
    assert(is_almost(ccm.calc_marginal_logp(), 0, precision));

    // push data into component model
    insert_elements(ccm, values_to_test);
    cout << endl << "component model after insertion of data" << endl;
    cout << ccm << endl;
    // test hypers
    int N_grid = 11;
    double test_scale = 10;
    ccm.get_suffstats(count, sum_x, sum_x_sq);
    ccm.get_hyper_doubles(r, nu, s, mu);
    double score_0 = ccm.calc_marginal_logp();
    vector<double> hyper_grid;
    vector<double> hyper_conditionals;
    double curr_hyper_conditional_in_grid;
    //
    //    test 'r' hyper
    cout << "testing r conditionals" << endl;
    hyper_grid = log_linspace(r / test_scale, r * test_scale, N_grid);
    hyper_conditionals = ccm.calc_hyper_conditionals("r", hyper_grid);
    cout << "r_grid from function: " << hyper_grid << endl;
    cout << "r_conditioanls from function: " << hyper_conditionals << endl;
    curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2];
    cout << "curr r conditional in grid: " << curr_hyper_conditional_in_grid <<
         endl;
    assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision));
    //
    //    test 'nu' hyper
    cout << "testing nu conditionals" << endl;
    hyper_grid = log_linspace(nu / test_scale, nu * test_scale, N_grid);
    hyper_conditionals = ccm.calc_hyper_conditionals("nu", hyper_grid);
    cout << "nu_grid: " << hyper_grid << endl;
    cout << "nu_conditionals: " << hyper_conditionals << endl;
    curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2];
    cout << "curr nu conditional in grid: " << curr_hyper_conditional_in_grid <<
         endl;
    assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision));
    //
    //    test 's' hyper
    cout << "testing s conditionals" << endl;
    hyper_grid = log_linspace(s / test_scale, s * test_scale, N_grid);
    hyper_conditionals = ccm.calc_hyper_conditionals("s", hyper_grid);
    cout << "s_grid: " << hyper_grid << endl;
    cout << "s_conditionals: " << hyper_conditionals << endl;
    curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2];
    cout << "curr s conditional in grid: " << curr_hyper_conditional_in_grid <<
         endl;
    assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision));
    //
    //    test 'mu' hyper
    cout << "testing mu conditionals" << endl;
    hyper_grid = log_linspace(mu / test_scale, mu * test_scale, N_grid);
    hyper_conditionals = ccm.calc_hyper_conditionals("mu", hyper_grid);
    cout << "mu_grid: " << hyper_grid << endl;
    cout << "mu_conditionals: " << hyper_conditionals << endl;
    curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2];
    cout << "curr  mu conditional in grid: " << curr_hyper_conditional_in_grid <<
         endl;
    assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision));

    // remove data from component model in SHUFFLED order
    remove_elements(ccm, values_to_test_shuffled);
    cout << endl << "component model after removal of data in shuffled order" <<
         endl;
    cout << ccm << endl;

    // Test marginal_logp and predictive_logp analytically
    hypers["r"] = 9;
    hypers["nu"] = 17;
    hypers["s"] = 15;
    hypers["m"] = 13;
    CCM ccm2(hypers);
    values_to_test.clear();
    values_to_test.push_back(7);
    values_to_test.push_back(4);
    values_to_test.push_back(3);
    values_to_test.push_back(2);
    insert_elements(ccm2, values_to_test);
    assert(is_almost(ccm2.calc_marginal_logp(), -34.2990812968, precision));
    assert(is_almost(ccm2.calc_element_predictive_logp(7), -2.73018549043,
                     precision));
    assert(is_almost(ccm2.calc_element_predictive_logp(4), -3.74794102225,
                     precision));
    assert(is_almost(ccm2.calc_element_predictive_logp(3), -4.18966316516,
                     precision));
    assert(is_almost(ccm2.calc_element_predictive_logp(2), -4.67271754595,
                     precision));

    cout << "Stop:: test_component model" << endl;
}