// calculate percentile value_t percentile( double x ) const { assert( x >= 0 && x <= 1.0 ); if ( simple ) return 0; if ( data().empty() ) return 0; if ( !is_sorted ) return base_t::nan(); // Should be improved to use linear interpolation return *( sorted_data()[ ( int ) ( x * ( sorted_data().size() - 1 ) ) ] ); }
void test_sorted_data_can_sort_complex_tree(void) { TEST_IGNORE(); int tree_data[] = { 2, 1, 3, 6, 7, 5 }; node_t *tree = build_tree(tree_data, ARRAY_SIZE(tree_data)); int expected[] = { 1, 2, 3, 5, 6, 7 }; int *actual = sorted_data(tree); TEST_ASSERT_EQUAL_INT_ARRAY(expected, actual, ARRAY_SIZE(expected)); free_tree(tree); free(actual); }
void test_sorted_data_can_sort_if_second_number_is_greater_than_first(void) { TEST_IGNORE(); int tree_data[] = { 2, 3 }; node_t *tree = build_tree(tree_data, ARRAY_SIZE(tree_data)); int expected[] = { 2, 3 }; int *actual = sorted_data(tree); TEST_ASSERT_EQUAL_INT_ARRAY(expected, actual, ARRAY_SIZE(expected)); free_tree(tree); free(actual); }
void test_sorted_data_can_sort_single_number(void) { TEST_IGNORE(); int tree_data[] = { 2 }; node_t *tree = build_tree(tree_data, ARRAY_SIZE(tree_data)); int expected[] = { 2 }; int *actual = sorted_data(tree); TEST_ASSERT_EQUAL_INT_ARRAY(expected, actual, ARRAY_SIZE(expected)); free_tree(tree); free(actual); }
int Stump::train(const Mat& data, const Mat& labels, const Mat& weights) { CV_Assert(labels.rows == 1 && labels.cols == data.cols); CV_Assert(weights.rows == 1 && weights.cols == data.cols); /* Assert that data and labels have int type */ /* Assert that weights have float type */ /* Prepare labels for each feature rearranged according to sorted order */ Mat sorted_labels(data.rows, data.cols, labels.type()); Mat sorted_weights(data.rows, data.cols, weights.type()); Mat indices; sortIdx(data, indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING); for( int row = 0; row < indices.rows; ++row ) { for( int col = 0; col < indices.cols; ++col ) { sorted_labels.at<int>(row, col) = labels.at<int>(0, indices.at<int>(row, col)); sorted_weights.at<float>(row, col) = weights.at<float>(0, indices.at<int>(row, col)); } } /* Sort feature values */ Mat sorted_data(data.rows, data.cols, data.type()); sort(data, sorted_data, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING); /* Split positive and negative weights */ Mat pos_weights = Mat::zeros(sorted_weights.rows, sorted_weights.cols, sorted_weights.type()); Mat neg_weights = Mat::zeros(sorted_weights.rows, sorted_weights.cols, sorted_weights.type()); for( int row = 0; row < data.rows; ++row ) { for( int col = 0; col < data.cols; ++col ) { if( sorted_labels.at<int>(row, col) == +1 ) { pos_weights.at<float>(row, col) = sorted_weights.at<float>(row, col); } else { neg_weights.at<float>(row, col) = sorted_weights.at<float>(row, col); } } } /* Compute cumulative sums for fast stump error computation */ Mat pos_cum_weights = Mat::zeros(sorted_weights.rows, sorted_weights.cols, sorted_weights.type()); Mat neg_cum_weights = Mat::zeros(sorted_weights.rows, sorted_weights.cols, sorted_weights.type()); cumsum(pos_weights, pos_cum_weights); cumsum(neg_weights, neg_cum_weights); /* Compute total weights of positive and negative samples */ float pos_total_weight = pos_cum_weights.at<float>(0, weights.cols - 1); float neg_total_weight = neg_cum_weights.at<float>(0, weights.cols - 1); float eps = 1.0f / (4 * labels.cols); /* Compute minimal error */ float min_err = FLT_MAX; int min_row = -1; int min_col = -1; int min_polarity = 0; float min_pos_value = 1, min_neg_value = -1; for( int row = 0; row < sorted_weights.rows; ++row ) { for( int col = 0; col < sorted_weights.cols - 1; ++col ) { float err, h_pos, h_neg; // Direct polarity float pos_wrong = pos_cum_weights.at<float>(row, col); float pos_right = pos_total_weight - pos_wrong; float neg_right = neg_cum_weights.at<float>(row, col); float neg_wrong = neg_total_weight - neg_right; h_pos = (float)(.5 * log((pos_right + eps) / (pos_wrong + eps))); h_neg = (float)(.5 * log((neg_wrong + eps) / (neg_right + eps))); err = sqrt(pos_right * neg_wrong) + sqrt(pos_wrong * neg_right); if( err < min_err ) { min_err = err; min_row = row; min_col = col; min_polarity = +1; min_pos_value = h_pos; min_neg_value = h_neg; } // Opposite polarity swap(pos_right, pos_wrong); swap(neg_right, neg_wrong); h_pos = -h_pos; h_neg = -h_neg; err = sqrt(pos_right * neg_wrong) + sqrt(pos_wrong * neg_right); if( err < min_err ) { min_err = err; min_row = row; min_col = col; min_polarity = -1; min_pos_value = h_pos; min_neg_value = h_neg; } } } /* Compute threshold, store found values in fields */ threshold_ = ( sorted_data.at<int>(min_row, min_col) + sorted_data.at<int>(min_row, min_col + 1) ) / 2; polarity_ = min_polarity; pos_value_ = min_pos_value; neg_value_ = min_neg_value; return min_row; }