static void test_uniform01(RandomNumberGenerator &rng) { vector<size_t> counts(PSI_DF); vector<double> probabilities(PSI_DF); size_t i; for (i = 0; i < probabilities.size(); i++) probabilities[i] = 1/static_cast<double>(PSI_DF); for (i = 0; i < NSAMPLES; i++) counts[static_cast<size_t>(floor(rng.next()*PSI_DF))]++; assert(psi_test(counts, probabilities, NSAMPLES)); }
vector<int> draw_crp_init_counts(int num_datum, double alpha, RandomNumberGenerator &rng) { vector<int> counts; double rand_u; int draw; int sum_counts = 0; for(int draw_idx=0; draw_idx<num_datum; draw_idx++) { rand_u = rng.next(); draw = numerics::crp_draw_sample(counts, sum_counts, alpha, rand_u); sum_counts++; insert_into_counts(draw, counts); } return counts; }
int main(int argc, char** argv) { cout << "Begin:: test_cluster" << endl; RandomNumberGenerator rng; // set some test sizing parameters int max_value = 20; int num_rows = 3; int num_cols = 3; // create the objects map<int, map<string, double> > hypers_m; for (int i = 0; i < num_cols; i++) { hypers_m[i] = create_default_hypers(); } vector<map<string, double>*> hypers_v; map<int, map<string, double> >::iterator hm_it; for (hm_it = hypers_m.begin(); hm_it != hypers_m.end(); hm_it++) { int key = hm_it->first; map<string, double>& hypers = hm_it->second; hypers_v.push_back(&hypers); cout << "hypers_" << key << ": " << hypers << endl; } cout << "hypers_v: " << hypers_v << endl; Cluster cd(hypers_v); vector<ComponentModel*> p_cm_v; for (int col_idx = 0; col_idx < num_cols; col_idx++) { ContinuousComponentModel *p_cm = new ContinuousComponentModel( *hypers_v[col_idx]); p_cm_v.push_back(p_cm); } // print the empty cluster cout << endl << endl << "begin empty cluster print" << endl; cout << cd << endl; cout << "end empty cluster print" << endl << endl << endl; // generate random data; vector<vector<double> > rows; for (int row_idx = 0; row_idx < num_rows; row_idx++) { vector<double> row_data; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double random_value = (rng.nexti(max_value) + 1) * rng.next(); row_data.push_back(random_value); } rows.push_back(row_data); } // poplute the objects cout << "Populating objects" << endl; for (int row_idx = 0; row_idx < num_rows; row_idx++) { vector<double> row_data = rows[row_idx]; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double random_value = rows[row_idx][col_idx]; p_cm_v[col_idx]->insert_element(random_value); } cd.insert_row(row_data, row_idx); } // test score equivalence vector<double> score_v; double sum_scores = 0; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double suff_score = p_cm_v[col_idx]->calc_marginal_logp(); score_v.push_back(suff_score); sum_scores += suff_score; } cout << "vector of separate suffstats scores after population: "; cout << score_v << endl; cout << "sum separate scores: " << sum_scores << endl; cout << "Cluster score with same data: " << cd.calc_sum_marginal_logps() << endl; cout << endl; // assert(is_almost(sum_scores, cd.calc_sum_marginal_logps(), 1E-10)); // test hypers for (int which_col = 0; which_col < num_cols; which_col++) { int N_grid = 11; double test_scale = 10; ContinuousComponentModel *p_ccm_i = dynamic_cast<ContinuousComponentModel*> (cd.p_model_v[which_col]); double r, nu, s, mu; double precision = 1E-10; p_ccm_i->get_hyper_doubles(r, nu, s, mu); double score_0 = p_ccm_i->calc_marginal_logp(); vector<double> hyper_grid; vector<double> hyper_conditionals; double curr_hyper_conditional_in_grid; // // test 'r' hyper cout << "testing r conditionals" << endl; hyper_grid = log_linspace(r / test_scale, r * test_scale, N_grid); hyper_conditionals = cd.calc_hyper_conditionals(which_col, "r", hyper_grid); cout << "r_grid from function: " << hyper_grid << endl; cout << "r_conditioanls from function: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr r conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // map<string, double> &hypers = cd.get_hypers_i(which_col); map<string, double>& hypers = *(*cd.p_model_v[which_col]).p_hypers; double prior_r = hypers["r"]; double new_r = hyper_grid[0]; // cout << endl << "testing incorporate hyper update" << endl; cout << "new r: " << new_r << endl; hypers["r"] = new_r; cd.incorporate_hyper_update(which_col); cout << "marginal logp with new r: " << cd.p_model_v[which_col]->calc_marginal_logp() << endl; // cout << "changing r back to: " << prior_r << endl; hypers["r"] = prior_r; cd.incorporate_hyper_update(which_col); cout << "marginal logp with prior r: " << cd.p_model_v[which_col]->calc_marginal_logp() << endl; cout << "done testing incorporate hyper update on col" << endl << endl; // // test 'nu' hyper cout << "testing nu conditionals" << endl; hyper_grid = log_linspace(nu / test_scale, nu * test_scale, N_grid); hyper_conditionals = cd.calc_hyper_conditionals(which_col, "nu", hyper_grid); cout << "nu_grid: " << hyper_grid << endl; cout << "nu_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr nu conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 's' hyper cout << "testing s conditionals" << endl; hyper_grid = log_linspace(s / test_scale, s * test_scale, N_grid); hyper_conditionals = cd.calc_hyper_conditionals(which_col, "s", hyper_grid); cout << "s_grid: " << hyper_grid << endl; cout << "s_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr s conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 'mu' hyper cout << "testing mu conditionals" << endl; hyper_grid = log_linspace(mu / test_scale, mu * test_scale, N_grid); hyper_conditionals = cd.calc_hyper_conditionals(which_col, "mu", hyper_grid); cout << "mu_grid: " << hyper_grid << endl; cout << "mu_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr mu conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); } // depopulate the objects cout << "De-populating objects" << endl; for (int row_idx = 0; row_idx < num_rows; row_idx++) { vector<double> row_data = rows[row_idx]; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double random_value = rows[row_idx][col_idx]; p_cm_v[col_idx]->remove_element(random_value); } cd.remove_row(row_data, row_idx); } // test score equivalence score_v.clear(); sum_scores = 0; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double suff_score = p_cm_v[col_idx]->calc_marginal_logp(); score_v.push_back(suff_score); sum_scores += suff_score; } cout << "vector of separate suffstats scores after depopulation: "; cout << score_v << endl; cout << "sum separate scores: " << sum_scores << endl; cout << "Cluster score with same data: " << cd.calc_sum_marginal_logps() << endl; cout << endl; // assert(is_almost(sum_scores, cd.calc_sum_marginal_logps(), 1E-10)); // test ability to remove columns // // poplute the cluster object cout << "Populating objects" << endl; for (int row_idx = 0; row_idx < num_rows; row_idx++) { vector<double> row_data = rows[row_idx]; cd.insert_row(row_data, row_idx); } cout << "cluster after population" << endl; cout << cd << endl; // // depopulate columns one by one while (cd.get_num_cols() > 0) { int col_idx = cd.get_num_cols() - 1; cout << "removing column: " << col_idx << endl; cd.remove_col(col_idx); cout << "removed column: " << col_idx << endl; cout << "cluster now looks like: " << endl; cout << cd << endl; } while (p_cm_v.size() != 0) { ComponentModel *p_cm = p_cm_v.back(); delete p_cm; p_cm_v.pop_back(); } cout << "Stop:: test_cluster" << endl; }
int main(int argc, char** argv) { cout << endl << "Begin:: test_continuous_component_model" << endl; RandomNumberGenerator rng; // test settings int max_randi = 30; int num_values_to_test = 10; double precision = 1E-10; // generate all the random data to use // // initial parameters double r0 = rng.nexti(max_randi) * rng.next(); double nu0 = rng.nexti(max_randi) * rng.next(); double s0 = rng.nexti(max_randi) * rng.next(); double mu0 = rng.nexti(max_randi) * rng.next(); // // elements to add vector<double> values_to_test; for (int i = 0; i < num_values_to_test; i++) { double rand_value = rng.nexti(max_randi) * rng.next(); values_to_test.push_back(rand_value); } // remove in a reversed order and a different order vector<double> values_to_test_reversed = values_to_test; std::reverse(values_to_test_reversed.begin(), values_to_test_reversed.end()); vector<double> values_to_test_shuffled = values_to_test; random_shuffle(values_to_test_shuffled.begin(), values_to_test_shuffled.end(), rng); // print generated values // cout << endl << "initial parameters: " << "\t"; cout << "r0: " << r0 << "\t"; cout << "nu0: " << nu0 << "\t"; cout << "s0: " << s0 << "\t"; cout << "mu0: " << mu0 << endl; cout << "values_to_test: " << values_to_test << endl; cout << "values_to_test_shuffled: " << values_to_test_shuffled << endl; // FIXME: should compare with a fixed dataset with known // post-update hyper values and score // FIXME: should be manually calling numerics:: functions // to compare component models results with // create the component model object // // r, nu, s, mu map<string, double> hypers; hypers["r"] = r0; hypers["nu"] = nu0; hypers["s"] = s0; hypers["mu"] = mu0; CCM ccm(hypers); cout << endl << "initial component model object" << endl; cout << ccm << endl; // verify initial parameters // int count; double sum_x, sum_x_sq; double r, nu, s, mu; ccm.get_suffstats(count, sum_x, sum_x_sq); ccm.get_hyper_doubles(r, nu, s, mu); assert(count == 0); assert(is_almost(sum_x, 0, precision)); assert(is_almost(sum_x_sq, 0, precision)); assert(is_almost(r, r0, precision)); assert(is_almost(nu, nu0, precision)); assert(is_almost(s, s0, precision)); assert(is_almost(mu, mu0, precision)); assert(is_almost(ccm.calc_marginal_logp(), 0, precision)); // push data into component model insert_elements(ccm, values_to_test); cout << endl << "component model after insertion of data" << endl; cout << ccm << endl; // ensure count is proper assert(ccm.get_count() == num_values_to_test); // remove data from component model in REVERSED order remove_elements(ccm, values_to_test_reversed); cout << endl << "component model after removal of data in reversed order" << endl; cout << ccm << endl; // ensure initial values are recovered ccm.get_suffstats(count, sum_x, sum_x_sq); ccm.get_hyper_doubles(r, nu, s, mu); assert(count == 0); assert(is_almost(sum_x, 0, precision)); assert(is_almost(sum_x_sq, 0, precision)); assert(is_almost(r, r0, precision)); assert(is_almost(nu, nu0, precision)); assert(is_almost(s, s0, precision)); assert(is_almost(mu, mu0, precision)); assert(is_almost(ccm.calc_marginal_logp(), 0, precision)); // push data into component model insert_elements(ccm, values_to_test); cout << endl << "component model after insertion of data" << endl; cout << ccm << endl; // ensure count is proper assert(ccm.get_count() == num_values_to_test); // remove data from component model in SHUFFLED order remove_elements(ccm, values_to_test_shuffled); cout << endl << "component model after removal of data in shuffled order" << endl; cout << ccm << endl; // ensure initial values are recovered ccm.get_suffstats(count, sum_x, sum_x_sq); ccm.get_hyper_doubles(r, nu, s, mu); assert(count == 0); assert(is_almost(sum_x, 0, precision)); assert(is_almost(sum_x_sq, 0, precision)); assert(is_almost(r, r0, precision)); assert(is_almost(nu, nu0, precision)); assert(is_almost(s, s0, precision)); assert(is_almost(mu, mu0, precision)); assert(is_almost(ccm.calc_marginal_logp(), 0, precision)); // push data into component model insert_elements(ccm, values_to_test); cout << endl << "component model after insertion of data" << endl; cout << ccm << endl; // test hypers int N_grid = 11; double test_scale = 10; ccm.get_suffstats(count, sum_x, sum_x_sq); ccm.get_hyper_doubles(r, nu, s, mu); double score_0 = ccm.calc_marginal_logp(); vector<double> hyper_grid; vector<double> hyper_conditionals; double curr_hyper_conditional_in_grid; // // test 'r' hyper cout << "testing r conditionals" << endl; hyper_grid = log_linspace(r / test_scale, r * test_scale, N_grid); hyper_conditionals = ccm.calc_hyper_conditionals("r", hyper_grid); cout << "r_grid from function: " << hyper_grid << endl; cout << "r_conditioanls from function: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr r conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 'nu' hyper cout << "testing nu conditionals" << endl; hyper_grid = log_linspace(nu / test_scale, nu * test_scale, N_grid); hyper_conditionals = ccm.calc_hyper_conditionals("nu", hyper_grid); cout << "nu_grid: " << hyper_grid << endl; cout << "nu_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr nu conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 's' hyper cout << "testing s conditionals" << endl; hyper_grid = log_linspace(s / test_scale, s * test_scale, N_grid); hyper_conditionals = ccm.calc_hyper_conditionals("s", hyper_grid); cout << "s_grid: " << hyper_grid << endl; cout << "s_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr s conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 'mu' hyper cout << "testing mu conditionals" << endl; hyper_grid = log_linspace(mu / test_scale, mu * test_scale, N_grid); hyper_conditionals = ccm.calc_hyper_conditionals("mu", hyper_grid); cout << "mu_grid: " << hyper_grid << endl; cout << "mu_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr mu conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // remove data from component model in SHUFFLED order remove_elements(ccm, values_to_test_shuffled); cout << endl << "component model after removal of data in shuffled order" << endl; cout << ccm << endl; // Test marginal_logp and predictive_logp analytically hypers["r"] = 9; hypers["nu"] = 17; hypers["s"] = 15; hypers["m"] = 13; CCM ccm2(hypers); values_to_test.clear(); values_to_test.push_back(7); values_to_test.push_back(4); values_to_test.push_back(3); values_to_test.push_back(2); insert_elements(ccm2, values_to_test); assert(is_almost(ccm2.calc_marginal_logp(), -34.2990812968, precision)); assert(is_almost(ccm2.calc_element_predictive_logp(7), -2.73018549043, precision)); assert(is_almost(ccm2.calc_element_predictive_logp(4), -3.74794102225, precision)); assert(is_almost(ccm2.calc_element_predictive_logp(3), -4.18966316516, precision)); assert(is_almost(ccm2.calc_element_predictive_logp(2), -4.67271754595, precision)); cout << "Stop:: test_component model" << endl; }