int main(int argc, char **argv) { if(argc != 2) return 1; std::string test(argv[1]); if (test == "basic1") { exit(basic1()?EXIT_SUCCESS:EXIT_FAILURE); } else if (test == "equal_elements") { exit(equal_elements()?EXIT_SUCCESS:EXIT_FAILURE); } else if (test == "stress_test") { stress_test(); return EXIT_FAILURE; } std::cerr << "No such test" << std::endl; return EXIT_FAILURE; }
/* * Predict class of current class */ int MondrianForest::predict_class(Sample& sample) { /* Go through all trees and calculate probability */ //float expo_param = 1.0; mondrian_confidence m_conf; arma::fvec pred_prob = predict_probability(sample, m_conf); int pred_class = -1; /* Predicted class of Mondrian forest */ /* If all probabilies are the same -> return -2 */ if (equal_elements(pred_prob)) { return -2; } float tmp_value = 0.; for (int i = 0; i < int(pred_prob.size()); i++) { if (pred_prob[i] > tmp_value) { tmp_value = pred_prob[i]; pred_class = i; } } return pred_class; }
/* * Predict class of current sample */ int MondrianNode::predict_class(Sample& sample, arma::fvec& pred_prob, float& prob_not_separated_yet, mondrian_confidence& m_conf) { if (settings_->debug) cout << "predict_class..." << endl; int pred_class = -1; /* * If x lies outside B^x_j at node j, the probability that x will branch * off into its own node at node j, denoted by p^s_j(x), is equal to the * probability that a split exists in B_j outside B^x_j */ int feature_dimension = mondrian_block_->get_feature_dim(); arma::fvec zero_vec(feature_dimension, arma::fill::zeros); /* \eta_j(x) */ float expo_param = 1.0; expo_param = arma::accu(arma::max(zero_vec, (sample.x - mondrian_block_->get_max_block_dim()))) + arma::accu(arma::max(zero_vec, (mondrian_block_->get_min_block_dim() - sample.x))); /* Compute mondrian confidence values */ if (is_leaf_) { /* 1. Compute euclidean distance */ m_conf.distance = arma::norm(arma::max(zero_vec, (sample.x - mondrian_block_->get_max_block_dim())),2) + arma::norm(arma::max(zero_vec, (mondrian_block_->get_min_block_dim() - sample.x)),2); /* 2. Get number of samples at current node */ m_conf.number_of_points = arma::accu(id_parent_node_->count_labels_); /* 3. Calculate densitiy of current mondrian block */ //arma::fvec tmp_vec = id_parent_node_->mondrian_block_->get_max_block_dim() - // id_parent_node_->mondrian_block_->get_min_block_dim(); //arma::fvec tmp_vec = mondrian_block_->get_max_block_dim() - mondrian_block_->get_min_block_dim(); m_conf.density = expo_param; } /* Probability that x_i will branch off into its own node at node j */ float prob_not_separated_now = exp(-expo_param * max_split_costs_); float prob_separated_now = 1 - prob_not_separated_now; /* p^s_j(x) */ if (settings_->debug) { cout << "prob_not_separated_now: " << prob_not_separated_now << endl; cout << "prob_separated_now: " << prob_separated_now << endl; } arma::fvec base = get_prior_mean(); float discount = exp(-settings_->discount_param * max_split_costs_); if (settings_->debug) cout << "discount: " << discount << endl; /* Interpolated Kneser Ney smoothing */ arma::Col<arma::uword> cnt(*num_classes_, arma::fill::zeros); if (is_leaf_) { cnt = count_labels_; } else { arma::Col<arma::uword> ones_vec(*num_classes_, arma::fill::ones); cnt = arma::min(count_labels_, ones_vec); } /* Check if current sample lies outside */ // or expo_param > 0 if (greater_zero(expo_param)) { /* * Compute expected discount d, where \delta is drawn from a truncated * expoential with rate \eta_j(x), truncated to the interval * [0, \delta] */ arma::fvec cnt_f = arma::conv_to<arma::fvec>::from(cnt); arma::fvec ones_vec(cnt_f.size(),arma::fill::ones); arma::fvec num_tables_k = arma::min(cnt_f, ones_vec); float num_customers = float(arma::sum(cnt)); float num_tables = float(arma::sum(num_tables_k)); /* * Expected discount is averaging over time of cut which is * a truncated exponential */ discount = (expo_param / (expo_param + settings_->discount_param)) * (-(exp(-(expo_param + settings_->discount_param) * max_split_costs_) - 1)) / (-(exp(-expo_param * max_split_costs_)-1)); float discount_per_num_customers = discount / num_customers; arma::fvec pred_prob_tmp = (num_tables * discount_per_num_customers * base) + (cnt_f / num_customers) - (discount_per_num_customers * num_tables_k); pred_prob += prob_separated_now * prob_not_separated_yet * pred_prob_tmp; prob_not_separated_yet *= prob_not_separated_now; } /* c_j,k: number of customers at restaurant j eating dish k */ /* Compute posterior mean normalized stable */ if (!is_leaf_) { if (equal(sample.x[split_dim_],split_loc_) || sample.x[split_dim_] < split_loc_) { if (settings_->debug) cout << "left" << endl; pred_class = id_left_child_node_->predict_class(sample, pred_prob, prob_not_separated_yet, m_conf); } else { if (settings_->debug) cout << "right" << endl; pred_class = id_right_child_node_->predict_class(sample, pred_prob, prob_not_separated_yet, m_conf); } } else if (is_leaf_ && greater_zero(expo_param) == false) { pred_prob = compute_posterior_mean_normalized_stable( cnt, discount, base) * prob_not_separated_yet; } /* Get class with highest probability */ /* Check if all classes have same probability -> return -2 */ if (equal_elements(pred_prob)) return -2; float tmp_value = 0.; for (int i = 0; i < int(pred_prob.size()); i++) { if (pred_prob[i] > tmp_value) { tmp_value = pred_prob[i]; pred_class = i; } } return pred_class; }