static void test_uniform_integer(RandomNumberGenerator &rng) { vector<size_t> counts(PSI_DF); vector<double> probabilities(PSI_DF); size_t i; for (i = 0; i < probabilities.size(); i++) probabilities[i] = 1/static_cast<double>(PSI_DF); for (i = 0; i < NSAMPLES; i++) counts[rng.nexti(PSI_DF)]++; assert(psi_test(counts, probabilities, NSAMPLES)); // Check that the psi test has sufficient statistical power to // detect the modulo bias. std::fill(counts.begin(), counts.end(), 0); for (i = 0; i < NSAMPLES; i++) counts[rng.nexti(2*PSI_DF + 1) % PSI_DF]++; assert(!psi_test(counts, probabilities, NSAMPLES)); }
int main(int argc, char** argv) { cout << "Begin:: test_cluster" << endl; RandomNumberGenerator rng; // set some test sizing parameters int max_value = 20; int num_rows = 3; int num_cols = 3; // create the objects map<int, map<string, double> > hypers_m; for (int i = 0; i < num_cols; i++) { hypers_m[i] = create_default_hypers(); } vector<map<string, double>*> hypers_v; map<int, map<string, double> >::iterator hm_it; for (hm_it = hypers_m.begin(); hm_it != hypers_m.end(); hm_it++) { int key = hm_it->first; map<string, double>& hypers = hm_it->second; hypers_v.push_back(&hypers); cout << "hypers_" << key << ": " << hypers << endl; } cout << "hypers_v: " << hypers_v << endl; Cluster cd(hypers_v); vector<ComponentModel*> p_cm_v; for (int col_idx = 0; col_idx < num_cols; col_idx++) { ContinuousComponentModel *p_cm = new ContinuousComponentModel( *hypers_v[col_idx]); p_cm_v.push_back(p_cm); } // print the empty cluster cout << endl << endl << "begin empty cluster print" << endl; cout << cd << endl; cout << "end empty cluster print" << endl << endl << endl; // generate random data; vector<vector<double> > rows; for (int row_idx = 0; row_idx < num_rows; row_idx++) { vector<double> row_data; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double random_value = (rng.nexti(max_value) + 1) * rng.next(); row_data.push_back(random_value); } rows.push_back(row_data); } // poplute the objects cout << "Populating objects" << endl; for (int row_idx = 0; row_idx < num_rows; row_idx++) { vector<double> row_data = rows[row_idx]; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double random_value = rows[row_idx][col_idx]; p_cm_v[col_idx]->insert_element(random_value); } cd.insert_row(row_data, row_idx); } // test score equivalence vector<double> score_v; double sum_scores = 0; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double suff_score = p_cm_v[col_idx]->calc_marginal_logp(); score_v.push_back(suff_score); sum_scores += suff_score; } cout << "vector of separate suffstats scores after population: "; cout << score_v << endl; cout << "sum separate scores: " << sum_scores << endl; cout << "Cluster score with same data: " << cd.calc_sum_marginal_logps() << endl; cout << endl; // assert(is_almost(sum_scores, cd.calc_sum_marginal_logps(), 1E-10)); // test hypers for (int which_col = 0; which_col < num_cols; which_col++) { int N_grid = 11; double test_scale = 10; ContinuousComponentModel *p_ccm_i = dynamic_cast<ContinuousComponentModel*> (cd.p_model_v[which_col]); double r, nu, s, mu; double precision = 1E-10; p_ccm_i->get_hyper_doubles(r, nu, s, mu); double score_0 = p_ccm_i->calc_marginal_logp(); vector<double> hyper_grid; vector<double> hyper_conditionals; double curr_hyper_conditional_in_grid; // // test 'r' hyper cout << "testing r conditionals" << endl; hyper_grid = log_linspace(r / test_scale, r * test_scale, N_grid); hyper_conditionals = cd.calc_hyper_conditionals(which_col, "r", hyper_grid); cout << "r_grid from function: " << hyper_grid << endl; cout << "r_conditioanls from function: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr r conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // map<string, double> &hypers = cd.get_hypers_i(which_col); map<string, double>& hypers = *(*cd.p_model_v[which_col]).p_hypers; double prior_r = hypers["r"]; double new_r = hyper_grid[0]; // cout << endl << "testing incorporate hyper update" << endl; cout << "new r: " << new_r << endl; hypers["r"] = new_r; cd.incorporate_hyper_update(which_col); cout << "marginal logp with new r: " << cd.p_model_v[which_col]->calc_marginal_logp() << endl; // cout << "changing r back to: " << prior_r << endl; hypers["r"] = prior_r; cd.incorporate_hyper_update(which_col); cout << "marginal logp with prior r: " << cd.p_model_v[which_col]->calc_marginal_logp() << endl; cout << "done testing incorporate hyper update on col" << endl << endl; // // test 'nu' hyper cout << "testing nu conditionals" << endl; hyper_grid = log_linspace(nu / test_scale, nu * test_scale, N_grid); hyper_conditionals = cd.calc_hyper_conditionals(which_col, "nu", hyper_grid); cout << "nu_grid: " << hyper_grid << endl; cout << "nu_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr nu conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 's' hyper cout << "testing s conditionals" << endl; hyper_grid = log_linspace(s / test_scale, s * test_scale, N_grid); hyper_conditionals = cd.calc_hyper_conditionals(which_col, "s", hyper_grid); cout << "s_grid: " << hyper_grid << endl; cout << "s_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr s conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 'mu' hyper cout << "testing mu conditionals" << endl; hyper_grid = log_linspace(mu / test_scale, mu * test_scale, N_grid); hyper_conditionals = cd.calc_hyper_conditionals(which_col, "mu", hyper_grid); cout << "mu_grid: " << hyper_grid << endl; cout << "mu_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr mu conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); } // depopulate the objects cout << "De-populating objects" << endl; for (int row_idx = 0; row_idx < num_rows; row_idx++) { vector<double> row_data = rows[row_idx]; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double random_value = rows[row_idx][col_idx]; p_cm_v[col_idx]->remove_element(random_value); } cd.remove_row(row_data, row_idx); } // test score equivalence score_v.clear(); sum_scores = 0; for (int col_idx = 0; col_idx < num_cols; col_idx++) { double suff_score = p_cm_v[col_idx]->calc_marginal_logp(); score_v.push_back(suff_score); sum_scores += suff_score; } cout << "vector of separate suffstats scores after depopulation: "; cout << score_v << endl; cout << "sum separate scores: " << sum_scores << endl; cout << "Cluster score with same data: " << cd.calc_sum_marginal_logps() << endl; cout << endl; // assert(is_almost(sum_scores, cd.calc_sum_marginal_logps(), 1E-10)); // test ability to remove columns // // poplute the cluster object cout << "Populating objects" << endl; for (int row_idx = 0; row_idx < num_rows; row_idx++) { vector<double> row_data = rows[row_idx]; cd.insert_row(row_data, row_idx); } cout << "cluster after population" << endl; cout << cd << endl; // // depopulate columns one by one while (cd.get_num_cols() > 0) { int col_idx = cd.get_num_cols() - 1; cout << "removing column: " << col_idx << endl; cd.remove_col(col_idx); cout << "removed column: " << col_idx << endl; cout << "cluster now looks like: " << endl; cout << cd << endl; } while (p_cm_v.size() != 0) { ComponentModel *p_cm = p_cm_v.back(); delete p_cm; p_cm_v.pop_back(); } cout << "Stop:: test_cluster" << endl; }
int main(int argc, char** argv) { cout << endl << "Begin:: test_continuous_component_model" << endl; RandomNumberGenerator rng; // test settings int max_randi = 30; int num_values_to_test = 10; double precision = 1E-10; // generate all the random data to use // // initial parameters double r0 = rng.nexti(max_randi) * rng.next(); double nu0 = rng.nexti(max_randi) * rng.next(); double s0 = rng.nexti(max_randi) * rng.next(); double mu0 = rng.nexti(max_randi) * rng.next(); // // elements to add vector<double> values_to_test; for (int i = 0; i < num_values_to_test; i++) { double rand_value = rng.nexti(max_randi) * rng.next(); values_to_test.push_back(rand_value); } // remove in a reversed order and a different order vector<double> values_to_test_reversed = values_to_test; std::reverse(values_to_test_reversed.begin(), values_to_test_reversed.end()); vector<double> values_to_test_shuffled = values_to_test; random_shuffle(values_to_test_shuffled.begin(), values_to_test_shuffled.end(), rng); // print generated values // cout << endl << "initial parameters: " << "\t"; cout << "r0: " << r0 << "\t"; cout << "nu0: " << nu0 << "\t"; cout << "s0: " << s0 << "\t"; cout << "mu0: " << mu0 << endl; cout << "values_to_test: " << values_to_test << endl; cout << "values_to_test_shuffled: " << values_to_test_shuffled << endl; // FIXME: should compare with a fixed dataset with known // post-update hyper values and score // FIXME: should be manually calling numerics:: functions // to compare component models results with // create the component model object // // r, nu, s, mu map<string, double> hypers; hypers["r"] = r0; hypers["nu"] = nu0; hypers["s"] = s0; hypers["mu"] = mu0; CCM ccm(hypers); cout << endl << "initial component model object" << endl; cout << ccm << endl; // verify initial parameters // int count; double sum_x, sum_x_sq; double r, nu, s, mu; ccm.get_suffstats(count, sum_x, sum_x_sq); ccm.get_hyper_doubles(r, nu, s, mu); assert(count == 0); assert(is_almost(sum_x, 0, precision)); assert(is_almost(sum_x_sq, 0, precision)); assert(is_almost(r, r0, precision)); assert(is_almost(nu, nu0, precision)); assert(is_almost(s, s0, precision)); assert(is_almost(mu, mu0, precision)); assert(is_almost(ccm.calc_marginal_logp(), 0, precision)); // push data into component model insert_elements(ccm, values_to_test); cout << endl << "component model after insertion of data" << endl; cout << ccm << endl; // ensure count is proper assert(ccm.get_count() == num_values_to_test); // remove data from component model in REVERSED order remove_elements(ccm, values_to_test_reversed); cout << endl << "component model after removal of data in reversed order" << endl; cout << ccm << endl; // ensure initial values are recovered ccm.get_suffstats(count, sum_x, sum_x_sq); ccm.get_hyper_doubles(r, nu, s, mu); assert(count == 0); assert(is_almost(sum_x, 0, precision)); assert(is_almost(sum_x_sq, 0, precision)); assert(is_almost(r, r0, precision)); assert(is_almost(nu, nu0, precision)); assert(is_almost(s, s0, precision)); assert(is_almost(mu, mu0, precision)); assert(is_almost(ccm.calc_marginal_logp(), 0, precision)); // push data into component model insert_elements(ccm, values_to_test); cout << endl << "component model after insertion of data" << endl; cout << ccm << endl; // ensure count is proper assert(ccm.get_count() == num_values_to_test); // remove data from component model in SHUFFLED order remove_elements(ccm, values_to_test_shuffled); cout << endl << "component model after removal of data in shuffled order" << endl; cout << ccm << endl; // ensure initial values are recovered ccm.get_suffstats(count, sum_x, sum_x_sq); ccm.get_hyper_doubles(r, nu, s, mu); assert(count == 0); assert(is_almost(sum_x, 0, precision)); assert(is_almost(sum_x_sq, 0, precision)); assert(is_almost(r, r0, precision)); assert(is_almost(nu, nu0, precision)); assert(is_almost(s, s0, precision)); assert(is_almost(mu, mu0, precision)); assert(is_almost(ccm.calc_marginal_logp(), 0, precision)); // push data into component model insert_elements(ccm, values_to_test); cout << endl << "component model after insertion of data" << endl; cout << ccm << endl; // test hypers int N_grid = 11; double test_scale = 10; ccm.get_suffstats(count, sum_x, sum_x_sq); ccm.get_hyper_doubles(r, nu, s, mu); double score_0 = ccm.calc_marginal_logp(); vector<double> hyper_grid; vector<double> hyper_conditionals; double curr_hyper_conditional_in_grid; // // test 'r' hyper cout << "testing r conditionals" << endl; hyper_grid = log_linspace(r / test_scale, r * test_scale, N_grid); hyper_conditionals = ccm.calc_hyper_conditionals("r", hyper_grid); cout << "r_grid from function: " << hyper_grid << endl; cout << "r_conditioanls from function: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr r conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 'nu' hyper cout << "testing nu conditionals" << endl; hyper_grid = log_linspace(nu / test_scale, nu * test_scale, N_grid); hyper_conditionals = ccm.calc_hyper_conditionals("nu", hyper_grid); cout << "nu_grid: " << hyper_grid << endl; cout << "nu_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr nu conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 's' hyper cout << "testing s conditionals" << endl; hyper_grid = log_linspace(s / test_scale, s * test_scale, N_grid); hyper_conditionals = ccm.calc_hyper_conditionals("s", hyper_grid); cout << "s_grid: " << hyper_grid << endl; cout << "s_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr s conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // // test 'mu' hyper cout << "testing mu conditionals" << endl; hyper_grid = log_linspace(mu / test_scale, mu * test_scale, N_grid); hyper_conditionals = ccm.calc_hyper_conditionals("mu", hyper_grid); cout << "mu_grid: " << hyper_grid << endl; cout << "mu_conditionals: " << hyper_conditionals << endl; curr_hyper_conditional_in_grid = hyper_conditionals[(int)(N_grid - 1) / 2]; cout << "curr mu conditional in grid: " << curr_hyper_conditional_in_grid << endl; assert(is_almost(score_0, curr_hyper_conditional_in_grid, precision)); // remove data from component model in SHUFFLED order remove_elements(ccm, values_to_test_shuffled); cout << endl << "component model after removal of data in shuffled order" << endl; cout << ccm << endl; // Test marginal_logp and predictive_logp analytically hypers["r"] = 9; hypers["nu"] = 17; hypers["s"] = 15; hypers["m"] = 13; CCM ccm2(hypers); values_to_test.clear(); values_to_test.push_back(7); values_to_test.push_back(4); values_to_test.push_back(3); values_to_test.push_back(2); insert_elements(ccm2, values_to_test); assert(is_almost(ccm2.calc_marginal_logp(), -34.2990812968, precision)); assert(is_almost(ccm2.calc_element_predictive_logp(7), -2.73018549043, precision)); assert(is_almost(ccm2.calc_element_predictive_logp(4), -3.74794102225, precision)); assert(is_almost(ccm2.calc_element_predictive_logp(3), -4.18966316516, precision)); assert(is_almost(ccm2.calc_element_predictive_logp(2), -4.67271754595, precision)); cout << "Stop:: test_component model" << endl; }
int main() { cout << endl << "Begin:: test_multinomial_component_model" << endl; RandomNumberGenerator rng; // test settings int NUM_BUCKETS = 5; int num_values_to_test = 30; double precision = 1E-10; map<string, double> hypers; // generate all the random data to use // // initial parameters vector<double> dirichlet_alphas_to_test; dirichlet_alphas_to_test.push_back(0.5); dirichlet_alphas_to_test.push_back(1.0); dirichlet_alphas_to_test.push_back(10.0); // // elements to add vector<double> values_to_test; for(int i=0; i<num_values_to_test; i++) { int rand_i = rng.nexti(NUM_BUCKETS); values_to_test.push_back(rand_i); } // cout << "values_to_test: " << values_to_test << endl; vector<double> values_to_test_reversed = values_to_test; std::reverse(values_to_test_reversed.begin(), values_to_test_reversed.end()); vector<double> values_to_test_shuffled = values_to_test; std::random_shuffle(values_to_test_shuffled.begin(), values_to_test_shuffled.end()); // print generated values // cout << endl << "initial parameters: " << "\t"; cout << "dirichlet_alphas_to_test: " << dirichlet_alphas_to_test << endl; cout << "values_to_test: " << values_to_test << endl; hypers["dirichlet_alpha"] = dirichlet_alphas_to_test[1]; hypers["K"] = NUM_BUCKETS; MCM mcm(hypers); cout << "calc_marginal_logp() on empty MultinomialComponentModel: "; cout << mcm.calc_marginal_logp() << endl; assert(is_almost(mcm.calc_marginal_logp(), 0, precision)); // cout << "test insertion and removal in same order" << endl; insert_elements(mcm, values_to_test); cout << mcm << endl; assert(is_almost(mcm.calc_marginal_logp(), -49.9364531937, precision)); assert(is_almost(mcm.calc_element_predictive_logp(0), log(3.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(1), log(7.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(2), log(10.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(3), log(6.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(4), log(9.0/35), precision)); remove_elements(mcm, values_to_test); cout << mcm << endl; // cout << "test insertion and removal in reversed order" << endl; insert_elements(mcm, values_to_test); cout << mcm << endl; assert(is_almost(mcm.calc_marginal_logp(), -49.9364531937, precision)); assert(is_almost(mcm.calc_element_predictive_logp(0), log(3.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(1), log(7.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(2), log(10.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(3), log(6.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(4), log(9.0/35), precision)); remove_elements(mcm, values_to_test_reversed); cout << mcm << endl; // cout << "test insertion and removal in shuffled order" << endl; insert_elements(mcm, values_to_test); cout << mcm << endl; assert(is_almost(mcm.calc_marginal_logp(), -49.9364531937, precision)); assert(is_almost(mcm.calc_element_predictive_logp(0), log(3.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(1), log(7.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(2), log(10.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(3), log(6.0/35), precision)); assert(is_almost(mcm.calc_element_predictive_logp(4), log(9.0/35), precision)); remove_elements(mcm, values_to_test_shuffled); cout << mcm << endl; cout << "test draws" << endl; cout << "inserting: " << values_to_test << endl; insert_elements(mcm, values_to_test); cout << mcm << endl; vector<double> draws; map<double, int> draw_counts; int num_draws = 10000; for(int i=0; i<num_draws; i++) { int rand_int = rng.nexti(); double draw = mcm.get_draw(rand_int); draws.push_back(draw); if(in(draw_counts, draw)) { draw_counts[draw]++; } else { draw_counts[draw] = 1; } } // cout << "draws are: " << draws << endl; cout << "draw_counts is: " << draw_counts << endl; cout << endl << endl << "test constructor with sparse input" << endl; // elements to add values_to_test.clear(); map<string, double> counts_to_use; for(int i=0; i<NUM_BUCKETS; i++) { counts_to_use[stringify(i)] = 0.; } int ignore_value = 0; for(int i=0; i<num_values_to_test; i++) { int rand_i = rng.nexti(NUM_BUCKETS); if(rand_i==ignore_value) { continue; } values_to_test.push_back(rand_i); counts_to_use[stringify(rand_i)]++; } counts_to_use.erase(stringify(ignore_value)); // cout << "values_to_test: " << values_to_test << endl; cout << "counts_to_use: " << counts_to_use << endl; // print generated values // cout << endl << "initial parameters: " << endl; cout << "values_to_test: " << values_to_test << endl; hypers["dirichlet_alpha"] = 1.; hypers["K"] = NUM_BUCKETS; MCM mcm2(hypers, values_to_test.size(), counts_to_use); cout << "component model: " << mcm2 << endl; draws.clear(); draw_counts.clear(); num_draws = 10000; for(int i=0; i<num_draws; i++) { int rand_int = rng.nexti(); double draw = mcm2.get_draw(rand_int); draws.push_back(draw); if(in(draw_counts, draw)) { draw_counts[draw]++; } else { draw_counts[draw] = 1; } } // cout << "draws are: " << draws << endl; cout << "draw_counts is: " << draw_counts << endl; double sum_p = 0; for(int element=0; element<NUM_BUCKETS; element++) { double element_p = exp(mcm2.calc_element_predictive_logp(element)); } cout << endl << "End:: test_multinomial_component_model" << endl; }