Example #1
0
    // calculate percentile
    value_t percentile( double x ) const
    {
        assert( x >= 0 && x <= 1.0 );

        if ( simple )
            return 0;

        if ( data().empty() )
            return 0;

        if ( !is_sorted )
            return base_t::nan();

        // Should be improved to use linear interpolation
        return *( sorted_data()[ ( int ) ( x * ( sorted_data().size() - 1 ) ) ] );
    }
void test_sorted_data_can_sort_complex_tree(void)
{
   TEST_IGNORE();
   int tree_data[] = { 2, 1, 3, 6, 7, 5 };
   node_t *tree = build_tree(tree_data, ARRAY_SIZE(tree_data));

   int expected[] = { 1, 2, 3, 5, 6, 7 };
   int *actual = sorted_data(tree);
   TEST_ASSERT_EQUAL_INT_ARRAY(expected, actual, ARRAY_SIZE(expected));

   free_tree(tree);
   free(actual);
}
void test_sorted_data_can_sort_if_second_number_is_greater_than_first(void)
{
   TEST_IGNORE();
   int tree_data[] = { 2, 3 };
   node_t *tree = build_tree(tree_data, ARRAY_SIZE(tree_data));

   int expected[] = { 2, 3 };
   int *actual = sorted_data(tree);
   TEST_ASSERT_EQUAL_INT_ARRAY(expected, actual, ARRAY_SIZE(expected));

   free_tree(tree);
   free(actual);
}
void test_sorted_data_can_sort_single_number(void)
{
   TEST_IGNORE();
   int tree_data[] = { 2 };
   node_t *tree = build_tree(tree_data, ARRAY_SIZE(tree_data));

   int expected[] = { 2 };
   int *actual = sorted_data(tree);
   TEST_ASSERT_EQUAL_INT_ARRAY(expected, actual, ARRAY_SIZE(expected));

   free_tree(tree);
   free(actual);
}
Example #5
0
int Stump::train(const Mat& data, const Mat& labels, const Mat& weights)
{
    CV_Assert(labels.rows == 1 && labels.cols == data.cols);
    CV_Assert(weights.rows == 1 && weights.cols == data.cols);
    /* Assert that data and labels have int type */
    /* Assert that weights have float type */


    /* Prepare labels for each feature rearranged according to sorted order */
    Mat sorted_labels(data.rows, data.cols, labels.type());
    Mat sorted_weights(data.rows, data.cols, weights.type());
    Mat indices;
    sortIdx(data, indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING);
    for( int row = 0; row < indices.rows; ++row )
    {
        for( int col = 0; col < indices.cols; ++col )
        {
            sorted_labels.at<int>(row, col) =
                labels.at<int>(0, indices.at<int>(row, col));
            sorted_weights.at<float>(row, col) =
                weights.at<float>(0, indices.at<int>(row, col));
        }
    }

    /* Sort feature values */
    Mat sorted_data(data.rows, data.cols, data.type());
    sort(data, sorted_data, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING);

    /* Split positive and negative weights */
    Mat pos_weights = Mat::zeros(sorted_weights.rows, sorted_weights.cols,
        sorted_weights.type());
    Mat neg_weights = Mat::zeros(sorted_weights.rows, sorted_weights.cols,
        sorted_weights.type());
    for( int row = 0; row < data.rows; ++row )
    {
        for( int col = 0; col < data.cols; ++col )
        {
            if( sorted_labels.at<int>(row, col) == +1 )
            {
                pos_weights.at<float>(row, col) =
                    sorted_weights.at<float>(row, col);
            }
            else
            {
                neg_weights.at<float>(row, col) =
                    sorted_weights.at<float>(row, col);
            }
        }
    }

    /* Compute cumulative sums for fast stump error computation */
    Mat pos_cum_weights = Mat::zeros(sorted_weights.rows, sorted_weights.cols,
        sorted_weights.type());
    Mat neg_cum_weights = Mat::zeros(sorted_weights.rows, sorted_weights.cols,
        sorted_weights.type());
    cumsum(pos_weights, pos_cum_weights);
    cumsum(neg_weights, neg_cum_weights);

    /* Compute total weights of positive and negative samples */
    float pos_total_weight = pos_cum_weights.at<float>(0, weights.cols - 1);
    float neg_total_weight = neg_cum_weights.at<float>(0, weights.cols - 1);


    float eps = 1.0f / (4 * labels.cols);

    /* Compute minimal error */
    float min_err = FLT_MAX;
    int min_row = -1;
    int min_col = -1;
    int min_polarity = 0;
    float min_pos_value = 1, min_neg_value = -1;

    for( int row = 0; row < sorted_weights.rows; ++row )
    {
        for( int col = 0; col < sorted_weights.cols - 1; ++col )
        {
            float err, h_pos, h_neg;

            // Direct polarity

            float pos_wrong = pos_cum_weights.at<float>(row, col);
            float pos_right = pos_total_weight - pos_wrong;

            float neg_right = neg_cum_weights.at<float>(row, col);
            float neg_wrong = neg_total_weight - neg_right;

            h_pos = (float)(.5 * log((pos_right + eps) / (pos_wrong + eps)));
            h_neg = (float)(.5 * log((neg_wrong + eps) / (neg_right + eps)));

            err = sqrt(pos_right * neg_wrong) + sqrt(pos_wrong * neg_right);

            if( err < min_err )
            {
                min_err = err;
                min_row = row;
                min_col = col;
                min_polarity = +1;
                min_pos_value = h_pos;
                min_neg_value = h_neg;
            }

            // Opposite polarity
            swap(pos_right, pos_wrong);
            swap(neg_right, neg_wrong);

            h_pos = -h_pos;
            h_neg = -h_neg;

            err = sqrt(pos_right * neg_wrong) + sqrt(pos_wrong * neg_right);


            if( err < min_err )
            {
                min_err = err;
                min_row = row;
                min_col = col;
                min_polarity = -1;
                min_pos_value = h_pos;
                min_neg_value = h_neg;
            }
        }
    }

    /* Compute threshold, store found values in fields */
    threshold_ = ( sorted_data.at<int>(min_row, min_col) +
                   sorted_data.at<int>(min_row, min_col + 1) ) / 2;
    polarity_ = min_polarity;
    pos_value_ = min_pos_value;
    neg_value_ = min_neg_value;

    return min_row;
}