int main(int argc, char** argv) {

//Read the data from csv file
CvMLData cvml;
cvml.read_csv("char_datasetNM2.csv");
//Indicate which column is the response
cvml.set_response_idx(0);


//Select 50% for the training 
CvTrainTestSplit cvtts(0.8f, true);
//Assign the division to the data
cvml.set_train_test_split(&cvtts);

CvBoost boost;

ifstream ifile("./trained_classifierNM2.xml");
if (ifile) 
{
	//The file exists, so we don't want to train 
	printf("Found trained_boost_char.xml file, remove it if you want to retrain with new data ... \n");
	boost.load("./trained_classifierNM2.xml", "boost");
} else {
	//Train with 100 features
	printf("Training ... \n");
	boost.train(&cvml, CvBoostParams(CvBoost::REAL, 100, 0, 1, false, 0), false);
}

//Calculate the test and train errors
std::vector<float> train_responses, test_responses;
float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses);
float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses);
printf("Error train %f \n", fl1);
printf("Error test %f \n", fl2);


//Try a char
static const float arr[] = {0,0.870690,0.096485,2.000000,2.000000,0.137080,1.269940,2.000000};
vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false );
float votes      = boost.predict( Mat(sample), Mat(), Range::all(), false, true );

printf("\n The char sample is predicted as: %f (with number of votes = %f)\n", prediction,votes);
printf(" Class probability (using Logistic Correction) is P(r|character) = %f\n", (float)1-(float)1/(1+exp(-2*votes)));

//Try a NONchar
//static const float arr2[] = {0,1.500000,0.072162,0.000000,8.000000,0.188095,1.578947,16.000000};
static const float arr2[] = {0,0.565217,0.103749,1.000000,2.000000,0.032258,1.525692,10.000000};
vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) );
prediction = boost.predict( Mat(sample2), Mat(), Range::all(), false, false );
votes      = boost.predict( Mat(sample2), Mat(), Range::all(), false, true );

printf("\n The non_char sample is predicted as: %f (with number of votes = %f)\n", prediction,votes);
printf(" Class probability (using Logistic Correction) is P(r|character) = %f\n\n", (float)1-(float)1/(1+exp(-2*votes)));

// Save the trained classifier
boost.save("./trained_classifierNM2.xml", "boost");

return EXIT_SUCCESS;
}
Example #2
0
cv::Mat readCSV(const string & filename, int datatype)
{
    // read CSV data
    CvMLData mlData;
    int err = mlData.read_csv(filename.c_str());
    if (err != 0)
    {
        cerr << "error: failed to load " << filename << endl;
        exit(-1);
    }

    // convert data to matrix
    cv::Mat values(mlData.get_values());
    cv::Mat data;
    values.convertTo(data, datatype);

    // display info
    cout << "filename: " << filename << endl;
    cout << "size: " << data.rows << " " << data.cols << endl;

    // display first lines
    cv::Mat subData(data, cv::Rect(0, 0, data.cols, 3));
    displayMat(subData);
    cout << "  ..." << endl;

    return data;
}
Example #3
0
int main(int argc, char** argv) {

	generateData();

	/* STEP 2. Opening the file */
	//1. Declare a structure to keep the data
	CvMLData cvml;
	//2. Read the file
	cvml.read_csv("samples.csv");
	//3. Indicate which column is the response
	cvml.set_response_idx(0);

	/* STEP 3. Splitting the samples */
	//1. Select 40 for the training
	CvTrainTestSplit cvtts(15, true);
	//2. Assign the division to the data
	cvml.set_train_test_split(&cvtts);

	printf("Training ... ");
	/* STEP 4. The training */
	//1. Declare the classifier
	CvBoost boost;
	//2. Train it with 100 features
	boost.train(&cvml, CvBoostParams(CvBoost::REAL, 100, 0, 1, false, 0), false);

	/* STEP 5. Calculating the testing and training error */
	// 1. Declare a couple of vectors to save the predictions of each sample
	vector<float> train_responses; 
	vector<float> test_responses;
	// 2. Calculate the training error
	float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses);
	// 3. Calculate the test error
	float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses);
	printf("Error train %f \n", fl1);
	printf("Error test %f \n", fl2);

	/* STEP 6. Save your classifier */
	// Save the trained classifier
	boost.save("./trained_boost.xml", "boost");

	return EXIT_SUCCESS;
}
Example #4
0
int main()
{
    const int train_sample_count = 300;

//#define LEPIOTA
#ifdef LEPIOTA
    const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data";
#else
    const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data";
#endif

    CvDTree dtree;
    CvBoost boost;
    CvRTrees rtrees;
    CvERTrees ertrees;

    CvMLData data;

    CvTrainTestSplit spl( train_sample_count );
    
    data.read_csv( filename );

#ifdef LEPIOTA
    data.set_response_idx( 0 );     
#else
    data.set_response_idx( 21 );     
    data.change_var_type( 21, CV_VAR_CATEGORICAL );
#endif

    data.set_train_test_split( &spl );
    
    printf("======DTREE=====\n");
    dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 ));
    print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() );

#ifdef LEPIOTA
    printf("======BOOST=====\n");
    boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
    print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 );
#endif

    printf("======RTREES=====\n");
    rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() );

    printf("======ERTREES=====\n");
    ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() );

    return 0;
}
int main(int argc, char** argv) {

/* STEP 2. Opening the file */
//1. Declare a structure to keep the data
CvMLData cvml;

//2. Read the file
cvml.read_csv("groups_dataset.csv");
//cvml.read_csv("strokes_dataset_noresized.csv");

//3. Indicate which column is the response
cvml.set_response_idx(0);


/* STEP 3. Splitting the samples */
//1. Select 50% for the training (an integer value is also allowed here)
CvTrainTestSplit cvtts(0.9f, true);
//2. Assign the division to the data
cvml.set_train_test_split(&cvtts);

/* STEP 4. The training */
//1. Declare the classifier
CvBoost boost;

ifstream ifile("./trained_boost_groups.xml");
if (ifile) 
{
	// The file exists, so we don't need to train 
	boost.load("./trained_boost_groups.xml", "boost");
} else {
	//2. Train it with 100 features
	printf("Training ... \n");
	boost.train(&cvml, CvBoostParams(CvBoost::REAL, 500, 0, 1, false, 0), false);
}

/* STEP 5. Calculating the testing and training error */
// 1. Declare a couple of vectors to save the predictions of each sample
std::vector<float> train_responses, test_responses;
// 2. Calculate the training error
float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses);
// 3. Calculate the test error
float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses);
printf("Error train %f \n", fl1);
printf("Error test %f \n", fl2);

static const float arr[] = {0,-1.980394,1.249858,-0.631116,2.819193,0.305448,0.108346,0.801116,0.104873,0.130908,0.559806,0.255053,0.455610,0.294118,0.455645,1.549193,0.087770,0.144896,1.650866};
vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false );
float votes      = boost.predict( Mat(sample), Mat(), Range::all(), false, true );

printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction,votes);

//static const float arr2[] = {0,0.911369,1.052156,1.154478,3.321924,0.829768,0.249785,0.616930,0.246637,0.399782,0.337159,0.103893,0.308142,0.666667,0.745356,1.118034,0.009747,0.011016,1.130162};
static const float arr2[] = {0,1.14335,3.00412,2.62747,3.26428,2.32749,0.713018,0.47244,0.289846,0.613508,0.40514,0.216716,0.53305,0.878788,3.21698,3.6607,0.0422318,0.114392,2.70868};
vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) );
float prediction2 = boost.predict( Mat(sample2), Mat(), Range::all(), false, false );
float votes2      = boost.predict( Mat(sample2), Mat(), Range::all(), false, true );

printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction2,votes2);

/* STEP 6. Save your classifier */
// Save the trained classifier
boost.save("./trained_boost_groups.xml", "boost");

return EXIT_SUCCESS;
}
Example #6
0
int CV_GBTreesTest::TestTrainPredict(int test_num)
{
    int code = cvtest::TS::OK;

    int weak_count = 200;
    float shrinkage = 0.1f;
    float subsample_portion = 0.5f;
    int max_depth = 5;
    bool use_surrogates = false;
    int loss_function_type = 0;
    switch (test_num)
    {
        case (1) : loss_function_type = CvGBTrees::SQUARED_LOSS; break;
        case (2) : loss_function_type = CvGBTrees::ABSOLUTE_LOSS; break;
        case (3) : loss_function_type = CvGBTrees::HUBER_LOSS; break;
        case (0) : loss_function_type = CvGBTrees::DEVIANCE_LOSS; break;
        default  :
            {
            ts->printf( cvtest::TS::LOG, "Bad test_num value in CV_GBTreesTest::TestTrainPredict(..) function." );
            return cvtest::TS::FAIL_BAD_ARG_CHECK;
            }
    }

    int dataset_num = test_num == 0 ? 0 : 1;
    if (!data)
    {
        data = new CvMLData();
        data->set_delimiter(',');

        if (data->read_csv(datasets[dataset_num].c_str()))
        {
            ts->printf( cvtest::TS::LOG, "File reading error." );
            return cvtest::TS::FAIL_INVALID_TEST_DATA;
        }

        if (test_num == 0)
        {
            data->set_response_idx(57);
            data->set_var_types("ord[0-56],cat[57]");
        }
        else
        {
            data->set_response_idx(13);
            data->set_var_types("ord[0-2,4-13],cat[3]");
            subsample_portion = 0.7f;
        }

        int train_sample_count = cvFloor(_get_len(data->get_responses())*0.5f);
        CvTrainTestSplit spl( train_sample_count );
        data->set_train_test_split( &spl );
    }

    data->mix_train_and_test_idx();


    if (gtb) delete gtb;
    gtb = new CvGBTrees();
    bool tmp_code = true;
    tmp_code = gtb->train(data, CvGBTreesParams(loss_function_type, weak_count,
                          shrinkage, subsample_portion,
                          max_depth, use_surrogates));

    if (!tmp_code)
    {
        ts->printf( cvtest::TS::LOG, "Model training was failed.");
        return cvtest::TS::FAIL_INVALID_OUTPUT;
    }

    code = checkPredictError(test_num);

    return code;

}
Example #7
0
int main() {
    // load codebook
    cout << "load codebook ... ..." << endl;
    string cbpath = "data/cluster/clst.npy";
    CvMLData mlData;
    mlData.read_csv(cbpath.c_str());
    Mat codebook(mlData.get_values());

    // load flann
    cout << "build flann ... ... " << endl;
    string flannpath = "data/cache/flann.index";
    cv::flann::IndexParams *indexParams;
    cv::flann::Index *flannIndex;
    if (exists(flannpath)) {
        indexParams = new cv::flann::SavedIndexParams(flannpath);
        flannIndex = new cv::flann::Index(codebook, *indexParams);
    }
    else {
        indexParams = new cv::flann::AutotunedIndexParams(); 
        flannIndex = new cv::flann::Index(codebook, *indexParams);
        flannIndex->save(flannpath);
    }

    // create the inverted index
    vector<string> siftpaths = readlines("data/featlist");
    string savepath = "data/cache/ivindex.txt";
    if (!exists(savepath)) {
        cerr << "index file doesn't exist ... ..." << endl;
        exit(-1);
    }
    IvIndex ivindex(savepath, codebook, siftpaths.size());

    // prepare the context and sockets
    zmq::context_t context(1);
    zmq::socket_t socket(context, ZMQ_REP);
    socket.bind("tcp://*:5555");

    cout << "start to receive request ... ..." << endl;
    // query 
    while (true) {
        // Wait for next request from client
        string recvStr = s_recv(socket);
        // ???? transform recvStr ukbench00000.th.jpg to data/Images/ukbench00000.jpg.sift
        char path[128];
        sprintf(path, "data/Images/ukbench%s.jpg.sift", recvStr.substr(7, 5).c_str());
        cout << "process request: " << recvStr << endl;
        vector<size_t> ret = ivindex.score(path, 15, flannIndex);

        sleep(1);
        // Send reply back to client
        string reply;
        for (auto x: ret) {
            reply += to_string(x) + " ";
        }
        s_send(socket, reply);
    }

    delete indexParams;
    indexParams = nullptr;
    delete flannIndex;
    flannIndex = nullptr;

    return 0;
}
Example #8
0
int main()
{
    const int train_sample_count = 300;
    bool is_regression = false;

    const char* filename = "data/waveform.data";
    int response_idx = 21;

    CvMLData data;

    CvTrainTestSplit spl( train_sample_count );
    
    if(data.read_csv(filename) != 0)
    {
        printf("couldn't read %s\n", filename);
        exit(0);
    }

    data.set_response_idx(response_idx);
    data.change_var_type(response_idx, CV_VAR_CATEGORICAL);
    data.set_train_test_split( &spl );

    const CvMat* values = data.get_values();
    const CvMat* response = data.get_responses();
    const CvMat* missing = data.get_missing();
    const CvMat* var_types = data.get_var_types();
    const CvMat* train_sidx = data.get_train_sample_idx();
    const CvMat* var_idx = data.get_var_idx();
    CvMat*response_map;
    CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL);
    int num_classes = response_map->cols;
    
    CvDTree dtree;
    printf("======DTREE=====\n");
    CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0);
    dtree.train( &data, cvd_params);
    print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() );

#if 0
    /* boosted trees are only implemented for two classes */
    printf("======BOOST=====\n");
    CvBoost boost;
    boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
    print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 );
#endif

    printf("======RTREES=====\n");
    CvRTrees rtrees;
    rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() );

    printf("======ERTREES=====\n");
    CvERTrees ertrees;
    ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() );

    printf("======GBTREES=====\n");
    CvGBTrees gbtrees;
    CvGBTreesParams gbparams;
    gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression
    gbtrees.train( &data, gbparams);
    
    //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx);
    print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0);

    printf("======KNEAREST=====\n");
    CvKNearest knearest;
    //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses,
    //                const Mat& _sample_idx, bool _is_regression,
    //                int _max_k, bool _update_base )
    bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL;
    assert(is_classifier);
    int max_k = 10;
    knearest.train(values, response, train_sidx, is_regression, max_k, false);

    CvMat* new_response = cvCreateMat(response->rows, 1, values->type);
    //print_types();

    //const CvMat* train_sidx = data.get_train_sample_idx();
    knearest.find_nearest(values, max_k, new_response, 0, 0, 0);

    print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR),
                 knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0);

    printf("======== RBF SVM =======\n");
    //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows);
    CvMySVM svm1;
    CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF,
                                     /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm1.train(values, response, train_sidx, var_idx, params1);
    svm1.train_auto(values, response, var_idx, train_sidx, params1);
    svm_print_error(&svm1, values, response, response_idx, train_sidx);

    printf("======== Linear SVM =======\n");
    CvMySVM svm2;
    CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR,
                                     /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm2.train(values, response, train_sidx, var_idx, params2);
    svm2.train_auto(values, response, var_idx, train_sidx, params2);
    svm_print_error(&svm2, values, response, response_idx, train_sidx);

    printf("======NEURONAL NETWORK=====\n");

    int num_layers = 3;
    CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1));
    cvmSetI(&layers, 0, 0, values->cols-1);
    cvmSetI(&layers, 0, 1, num_classes);
    cvmSetI(&layers, 0, 2, num_classes);
    CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0);
    CvANN_MLP_TrainParams ann_params;
    //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP;
    CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes);

    CvMat values2 = cvmat_remove_column(values, response_idx);
    ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000);
    //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000);

    ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx);

#if 0 /* slow */

    printf("======== Polygonal SVM =======\n");
    //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows);
    CvMySVM svm3;
    CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY,
                                     /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm3.train(values, response, train_sidx, var_idx, params3);
    svm3.train_auto(values, response, var_idx, train_sidx, params3);
    svm_print_error(&svm3, values, response, response_idx, train_sidx);
#endif

    return 0;
}
int main(int argc, char** argv) {

/* STEP 2. Opening the file */
//1. Declare a structure to keep the data
CvMLData cvml;

//2. Read the file
cvml.read_csv("char_dataset.csv");
//cvml.read_csv("strokes_dataset_noresized.csv");

//3. Indicate which column is the response
cvml.set_response_idx(0);


/* STEP 3. Splitting the samples */
//1. Select 50% for the training (an integer value is also allowed here)
CvTrainTestSplit cvtts(0.9f, true);
//2. Assign the division to the data
cvml.set_train_test_split(&cvtts);

/* STEP 4. The training */
//1. Declare the classifier
CvBoost boost;

ifstream ifile("./trained_boost_char.xml");
if (ifile) 
{
	// The file exists, so we don't need to train 
	boost.load("./trained_boost_char.xml", "boost");
} else {
	//2. Train it with 100 features
	printf("Training ... \n");
	boost.train(&cvml, CvBoostParams(CvBoost::REAL, 2, 0, 1, false, 0), false);
}
cout<<"after train"<<endl;

/* STEP 5. Calculating the testing and training error */
// 1. Declare a couple of vectors to save the predictions of each sample
std::vector<float> train_responses, test_responses;
// 2. Calculate the training error
float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses);
// 3. Calculate the test error
float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses);
printf("Error train %f \n", fl1);
printf("Error test %f \n", fl2);


//Try a char
static const float arr[] = {0,1.659899,0.684169,0.412175,150.000000,81.000000,0.540000,0.358025,0.151203,0.000000,0.000000};

vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false );
float votes      = boost.predict( Mat(sample), Mat(), Range::all(), false, true );

printf("\n The sample (360) is predicted as: %f (with number of votes = %f)\n", prediction,votes);

//Try a NONchar
static const float arr2[] = {0,1.250000,0.433013,0.346410,9.000000,8.000000,0.888889,0.833333,0.375000,0.000000,0.000000};

vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) );
prediction = boost.predict( Mat(sample2), Mat(), Range::all(), false, false );
votes      = boost.predict( Mat(sample2), Mat(), Range::all(), false, true );

printf("\n The sample (367) is predicted as: %f (with number of votes = %f)\n", prediction,votes);

/* STEP 6. Save your classifier */
// Save the trained classifier
boost.save("./trained_boost_char.xml", "boost");

return EXIT_SUCCESS;
}