Ejemplo n.º 1
0
int main(int argc, char **argv) {
	if(argc != 2) return 1;
	std::string test(argv[1]);
	if (test == "basic1") {
		exit(basic1()?EXIT_SUCCESS:EXIT_FAILURE);
	} else if (test == "equal_elements") {
		exit(equal_elements()?EXIT_SUCCESS:EXIT_FAILURE);
	} else if (test == "stress_test") {
		stress_test();
		return EXIT_FAILURE;
	}

	std::cerr << "No such test" << std::endl;
	return EXIT_FAILURE;
	
}
/*
 * Predict class of current class
 */ 
int MondrianForest::predict_class(Sample& sample) {
    /* Go through all trees and calculate probability */
    //float expo_param = 1.0;
    mondrian_confidence m_conf;
    arma::fvec pred_prob = predict_probability(sample, m_conf);

    int pred_class = -1;  /* Predicted class of Mondrian forest */ 
    /* If all probabilies are the same -> return -2 */
    if (equal_elements(pred_prob)) {
        return -2;
    }

    float tmp_value = 0.;
    for (int i = 0; i < int(pred_prob.size()); i++) {
        if (pred_prob[i] > tmp_value) {
            tmp_value = pred_prob[i];
            pred_class = i;
        }
    }
    return pred_class;
}
/*
 * Predict class of current sample
 */
int MondrianNode::predict_class(Sample& sample, arma::fvec& pred_prob, 
        float& prob_not_separated_yet, mondrian_confidence& m_conf) {

    if (settings_->debug)
        cout << "predict_class..." << endl;
    int pred_class = -1;
    /* 
     * If x lies outside B^x_j at node j, the probability that x will branch 
     * off into its own node at node j, denoted by p^s_j(x), is equal to the
     * probability that a split exists in B_j outside B^x_j 
     */
    int feature_dimension = mondrian_block_->get_feature_dim();
    arma::fvec zero_vec(feature_dimension, arma::fill::zeros);
    /* \eta_j(x) */
    float expo_param = 1.0;
    expo_param = arma::accu(arma::max(zero_vec, 
                (sample.x - mondrian_block_->get_max_block_dim()))) + 
        arma::accu(arma::max(zero_vec, 
                    (mondrian_block_->get_min_block_dim() - sample.x)));
    /* Compute mondrian confidence values */
    if (is_leaf_) {
        /* 1. Compute euclidean distance */
        m_conf.distance = arma::norm(arma::max(zero_vec, 
                    (sample.x - mondrian_block_->get_max_block_dim())),2) + 
            arma::norm(arma::max(zero_vec, 
                        (mondrian_block_->get_min_block_dim() - sample.x)),2);
        /* 2. Get number of samples at current node */
        m_conf.number_of_points = arma::accu(id_parent_node_->count_labels_);
        /* 3. Calculate densitiy of current mondrian block */
        //arma::fvec tmp_vec = id_parent_node_->mondrian_block_->get_max_block_dim() - 
        //   id_parent_node_->mondrian_block_->get_min_block_dim();
        //arma::fvec tmp_vec = mondrian_block_->get_max_block_dim() - mondrian_block_->get_min_block_dim();
        m_conf.density = expo_param;
    }
    /* Probability that x_i will branch off into its own node at node j */
    float prob_not_separated_now = exp(-expo_param * max_split_costs_);
    float prob_separated_now = 1 - prob_not_separated_now;  /* p^s_j(x) */ 
    if (settings_->debug) {
        cout << "prob_not_separated_now: " << prob_not_separated_now << endl;
        cout << "prob_separated_now: " << prob_separated_now << endl;
    }
    arma::fvec base = get_prior_mean();
    
    float discount = exp(-settings_->discount_param * max_split_costs_);
    
    if (settings_->debug)
        cout << "discount: " << discount << endl;
    /* Interpolated Kneser Ney smoothing */
    arma::Col<arma::uword> cnt(*num_classes_, arma::fill::zeros);
    if (is_leaf_) {
        cnt = count_labels_;
    } else {
        arma::Col<arma::uword> ones_vec(*num_classes_, arma::fill::ones);
        cnt = arma::min(count_labels_, ones_vec);
    }

    /* Check if current sample lies outside */
    // or expo_param > 0
    if (greater_zero(expo_param)) {
        /* 
         * Compute expected discount d, where \delta is drawn from a truncated
         * expoential with rate \eta_j(x), truncated to the interval
         * [0, \delta]
         */
        arma::fvec cnt_f = arma::conv_to<arma::fvec>::from(cnt);
        arma::fvec ones_vec(cnt_f.size(),arma::fill::ones);
        arma::fvec num_tables_k = arma::min(cnt_f, ones_vec);
        float num_customers = float(arma::sum(cnt));
        float num_tables = float(arma::sum(num_tables_k));
        
        /* 
         * Expected discount is averaging over time of cut which is
         * a truncated exponential
         */
        discount = (expo_param / (expo_param + settings_->discount_param)) *
            (-(exp(-(expo_param + settings_->discount_param) *
             max_split_costs_) - 1)) / 
            (-(exp(-expo_param * max_split_costs_)-1));

        float discount_per_num_customers = discount / num_customers;
        arma::fvec pred_prob_tmp = (num_tables * discount_per_num_customers *
            base) + (cnt_f / num_customers) - (discount_per_num_customers * 
            num_tables_k);

        pred_prob += prob_separated_now * prob_not_separated_yet * pred_prob_tmp;
        prob_not_separated_yet *= prob_not_separated_now;
    }
    /* c_j,k: number of customers at restaurant j eating dish k */     
    /* Compute posterior mean normalized stable */
    if (!is_leaf_) {
        if (equal(sample.x[split_dim_],split_loc_) || sample.x[split_dim_]
                < split_loc_) {

            if (settings_->debug) 
                cout << "left" << endl;
            pred_class = id_left_child_node_->predict_class(sample, pred_prob,
                    prob_not_separated_yet, m_conf);
        } else {
            if (settings_->debug) 
                cout << "right" << endl;
            pred_class = id_right_child_node_->predict_class(sample, pred_prob,
                    prob_not_separated_yet, m_conf);
        }
    } else if (is_leaf_ && greater_zero(expo_param) == false) {
        pred_prob = compute_posterior_mean_normalized_stable(
                cnt, discount, base) * prob_not_separated_yet;
    }
    /* Get class with highest probability */
    /* Check if all classes have same probability -> return -2 */
    if (equal_elements(pred_prob))
        return -2;
    float tmp_value = 0.;
    for (int i = 0; i < int(pred_prob.size()); i++) {
        if (pred_prob[i] > tmp_value) {
            tmp_value = pred_prob[i];
            pred_class = i;
        }
    }
    return pred_class;
}