int main(int argc, char** argv) { //Read the data from csv file CvMLData cvml; cvml.read_csv("char_datasetNM2.csv"); //Indicate which column is the response cvml.set_response_idx(0); //Select 50% for the training CvTrainTestSplit cvtts(0.8f, true); //Assign the division to the data cvml.set_train_test_split(&cvtts); CvBoost boost; ifstream ifile("./trained_classifierNM2.xml"); if (ifile) { //The file exists, so we don't want to train printf("Found trained_boost_char.xml file, remove it if you want to retrain with new data ... \n"); boost.load("./trained_classifierNM2.xml", "boost"); } else { //Train with 100 features printf("Training ... \n"); boost.train(&cvml, CvBoostParams(CvBoost::REAL, 100, 0, 1, false, 0), false); } //Calculate the test and train errors std::vector<float> train_responses, test_responses; float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); printf("Error train %f \n", fl1); printf("Error test %f \n", fl2); //Try a char static const float arr[] = {0,0.870690,0.096485,2.000000,2.000000,0.137080,1.269940,2.000000}; vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false ); float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true ); printf("\n The char sample is predicted as: %f (with number of votes = %f)\n", prediction,votes); printf(" Class probability (using Logistic Correction) is P(r|character) = %f\n", (float)1-(float)1/(1+exp(-2*votes))); //Try a NONchar //static const float arr2[] = {0,1.500000,0.072162,0.000000,8.000000,0.188095,1.578947,16.000000}; static const float arr2[] = {0,0.565217,0.103749,1.000000,2.000000,0.032258,1.525692,10.000000}; vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) ); prediction = boost.predict( Mat(sample2), Mat(), Range::all(), false, false ); votes = boost.predict( Mat(sample2), Mat(), Range::all(), false, true ); printf("\n The non_char sample is predicted as: %f (with number of votes = %f)\n", prediction,votes); printf(" Class probability (using Logistic Correction) is P(r|character) = %f\n\n", (float)1-(float)1/(1+exp(-2*votes))); // Save the trained classifier boost.save("./trained_classifierNM2.xml", "boost"); return EXIT_SUCCESS; }
cv::Mat readCSV(const string & filename, int datatype) { // read CSV data CvMLData mlData; int err = mlData.read_csv(filename.c_str()); if (err != 0) { cerr << "error: failed to load " << filename << endl; exit(-1); } // convert data to matrix cv::Mat values(mlData.get_values()); cv::Mat data; values.convertTo(data, datatype); // display info cout << "filename: " << filename << endl; cout << "size: " << data.rows << " " << data.cols << endl; // display first lines cv::Mat subData(data, cv::Rect(0, 0, data.cols, 3)); displayMat(subData); cout << " ..." << endl; return data; }
int main(int argc, char** argv) { generateData(); /* STEP 2. Opening the file */ //1. Declare a structure to keep the data CvMLData cvml; //2. Read the file cvml.read_csv("samples.csv"); //3. Indicate which column is the response cvml.set_response_idx(0); /* STEP 3. Splitting the samples */ //1. Select 40 for the training CvTrainTestSplit cvtts(15, true); //2. Assign the division to the data cvml.set_train_test_split(&cvtts); printf("Training ... "); /* STEP 4. The training */ //1. Declare the classifier CvBoost boost; //2. Train it with 100 features boost.train(&cvml, CvBoostParams(CvBoost::REAL, 100, 0, 1, false, 0), false); /* STEP 5. Calculating the testing and training error */ // 1. Declare a couple of vectors to save the predictions of each sample vector<float> train_responses; vector<float> test_responses; // 2. Calculate the training error float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); // 3. Calculate the test error float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); printf("Error train %f \n", fl1); printf("Error test %f \n", fl2); /* STEP 6. Save your classifier */ // Save the trained classifier boost.save("./trained_boost.xml", "boost"); return EXIT_SUCCESS; }
int main() { const int train_sample_count = 300; //#define LEPIOTA #ifdef LEPIOTA const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data"; #else const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data"; #endif CvDTree dtree; CvBoost boost; CvRTrees rtrees; CvERTrees ertrees; CvMLData data; CvTrainTestSplit spl( train_sample_count ); data.read_csv( filename ); #ifdef LEPIOTA data.set_response_idx( 0 ); #else data.set_response_idx( 21 ); data.change_var_type( 21, CV_VAR_CATEGORICAL ); #endif data.set_train_test_split( &spl ); printf("======DTREE=====\n"); dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 )); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() ); #ifdef LEPIOTA printf("======BOOST=====\n"); boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 ); #endif printf("======RTREES=====\n"); rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() ); return 0; }
int main(int argc, char** argv) { /* STEP 2. Opening the file */ //1. Declare a structure to keep the data CvMLData cvml; //2. Read the file cvml.read_csv("groups_dataset.csv"); //cvml.read_csv("strokes_dataset_noresized.csv"); //3. Indicate which column is the response cvml.set_response_idx(0); /* STEP 3. Splitting the samples */ //1. Select 50% for the training (an integer value is also allowed here) CvTrainTestSplit cvtts(0.9f, true); //2. Assign the division to the data cvml.set_train_test_split(&cvtts); /* STEP 4. The training */ //1. Declare the classifier CvBoost boost; ifstream ifile("./trained_boost_groups.xml"); if (ifile) { // The file exists, so we don't need to train boost.load("./trained_boost_groups.xml", "boost"); } else { //2. Train it with 100 features printf("Training ... \n"); boost.train(&cvml, CvBoostParams(CvBoost::REAL, 500, 0, 1, false, 0), false); } /* STEP 5. Calculating the testing and training error */ // 1. Declare a couple of vectors to save the predictions of each sample std::vector<float> train_responses, test_responses; // 2. Calculate the training error float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); // 3. Calculate the test error float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); printf("Error train %f \n", fl1); printf("Error test %f \n", fl2); static const float arr[] = {0,-1.980394,1.249858,-0.631116,2.819193,0.305448,0.108346,0.801116,0.104873,0.130908,0.559806,0.255053,0.455610,0.294118,0.455645,1.549193,0.087770,0.144896,1.650866}; vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false ); float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true ); printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction,votes); //static const float arr2[] = {0,0.911369,1.052156,1.154478,3.321924,0.829768,0.249785,0.616930,0.246637,0.399782,0.337159,0.103893,0.308142,0.666667,0.745356,1.118034,0.009747,0.011016,1.130162}; static const float arr2[] = {0,1.14335,3.00412,2.62747,3.26428,2.32749,0.713018,0.47244,0.289846,0.613508,0.40514,0.216716,0.53305,0.878788,3.21698,3.6607,0.0422318,0.114392,2.70868}; vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) ); float prediction2 = boost.predict( Mat(sample2), Mat(), Range::all(), false, false ); float votes2 = boost.predict( Mat(sample2), Mat(), Range::all(), false, true ); printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction2,votes2); /* STEP 6. Save your classifier */ // Save the trained classifier boost.save("./trained_boost_groups.xml", "boost"); return EXIT_SUCCESS; }
int CV_GBTreesTest::TestTrainPredict(int test_num) { int code = cvtest::TS::OK; int weak_count = 200; float shrinkage = 0.1f; float subsample_portion = 0.5f; int max_depth = 5; bool use_surrogates = false; int loss_function_type = 0; switch (test_num) { case (1) : loss_function_type = CvGBTrees::SQUARED_LOSS; break; case (2) : loss_function_type = CvGBTrees::ABSOLUTE_LOSS; break; case (3) : loss_function_type = CvGBTrees::HUBER_LOSS; break; case (0) : loss_function_type = CvGBTrees::DEVIANCE_LOSS; break; default : { ts->printf( cvtest::TS::LOG, "Bad test_num value in CV_GBTreesTest::TestTrainPredict(..) function." ); return cvtest::TS::FAIL_BAD_ARG_CHECK; } } int dataset_num = test_num == 0 ? 0 : 1; if (!data) { data = new CvMLData(); data->set_delimiter(','); if (data->read_csv(datasets[dataset_num].c_str())) { ts->printf( cvtest::TS::LOG, "File reading error." ); return cvtest::TS::FAIL_INVALID_TEST_DATA; } if (test_num == 0) { data->set_response_idx(57); data->set_var_types("ord[0-56],cat[57]"); } else { data->set_response_idx(13); data->set_var_types("ord[0-2,4-13],cat[3]"); subsample_portion = 0.7f; } int train_sample_count = cvFloor(_get_len(data->get_responses())*0.5f); CvTrainTestSplit spl( train_sample_count ); data->set_train_test_split( &spl ); } data->mix_train_and_test_idx(); if (gtb) delete gtb; gtb = new CvGBTrees(); bool tmp_code = true; tmp_code = gtb->train(data, CvGBTreesParams(loss_function_type, weak_count, shrinkage, subsample_portion, max_depth, use_surrogates)); if (!tmp_code) { ts->printf( cvtest::TS::LOG, "Model training was failed."); return cvtest::TS::FAIL_INVALID_OUTPUT; } code = checkPredictError(test_num); return code; }
int main() { // load codebook cout << "load codebook ... ..." << endl; string cbpath = "data/cluster/clst.npy"; CvMLData mlData; mlData.read_csv(cbpath.c_str()); Mat codebook(mlData.get_values()); // load flann cout << "build flann ... ... " << endl; string flannpath = "data/cache/flann.index"; cv::flann::IndexParams *indexParams; cv::flann::Index *flannIndex; if (exists(flannpath)) { indexParams = new cv::flann::SavedIndexParams(flannpath); flannIndex = new cv::flann::Index(codebook, *indexParams); } else { indexParams = new cv::flann::AutotunedIndexParams(); flannIndex = new cv::flann::Index(codebook, *indexParams); flannIndex->save(flannpath); } // create the inverted index vector<string> siftpaths = readlines("data/featlist"); string savepath = "data/cache/ivindex.txt"; if (!exists(savepath)) { cerr << "index file doesn't exist ... ..." << endl; exit(-1); } IvIndex ivindex(savepath, codebook, siftpaths.size()); // prepare the context and sockets zmq::context_t context(1); zmq::socket_t socket(context, ZMQ_REP); socket.bind("tcp://*:5555"); cout << "start to receive request ... ..." << endl; // query while (true) { // Wait for next request from client string recvStr = s_recv(socket); // ???? transform recvStr ukbench00000.th.jpg to data/Images/ukbench00000.jpg.sift char path[128]; sprintf(path, "data/Images/ukbench%s.jpg.sift", recvStr.substr(7, 5).c_str()); cout << "process request: " << recvStr << endl; vector<size_t> ret = ivindex.score(path, 15, flannIndex); sleep(1); // Send reply back to client string reply; for (auto x: ret) { reply += to_string(x) + " "; } s_send(socket, reply); } delete indexParams; indexParams = nullptr; delete flannIndex; flannIndex = nullptr; return 0; }
int main() { const int train_sample_count = 300; bool is_regression = false; const char* filename = "data/waveform.data"; int response_idx = 21; CvMLData data; CvTrainTestSplit spl( train_sample_count ); if(data.read_csv(filename) != 0) { printf("couldn't read %s\n", filename); exit(0); } data.set_response_idx(response_idx); data.change_var_type(response_idx, CV_VAR_CATEGORICAL); data.set_train_test_split( &spl ); const CvMat* values = data.get_values(); const CvMat* response = data.get_responses(); const CvMat* missing = data.get_missing(); const CvMat* var_types = data.get_var_types(); const CvMat* train_sidx = data.get_train_sample_idx(); const CvMat* var_idx = data.get_var_idx(); CvMat*response_map; CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL); int num_classes = response_map->cols; CvDTree dtree; printf("======DTREE=====\n"); CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0); dtree.train( &data, cvd_params); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() ); #if 0 /* boosted trees are only implemented for two classes */ printf("======BOOST=====\n"); CvBoost boost; boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 ); #endif printf("======RTREES=====\n"); CvRTrees rtrees; rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); CvERTrees ertrees; ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() ); printf("======GBTREES=====\n"); CvGBTrees gbtrees; CvGBTreesParams gbparams; gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression gbtrees.train( &data, gbparams); //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx); print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0); printf("======KNEAREST=====\n"); CvKNearest knearest; //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses, // const Mat& _sample_idx, bool _is_regression, // int _max_k, bool _update_base ) bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL; assert(is_classifier); int max_k = 10; knearest.train(values, response, train_sidx, is_regression, max_k, false); CvMat* new_response = cvCreateMat(response->rows, 1, values->type); //print_types(); //const CvMat* train_sidx = data.get_train_sample_idx(); knearest.find_nearest(values, max_k, new_response, 0, 0, 0); print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR), knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0); printf("======== RBF SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm1; CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm1.train(values, response, train_sidx, var_idx, params1); svm1.train_auto(values, response, var_idx, train_sidx, params1); svm_print_error(&svm1, values, response, response_idx, train_sidx); printf("======== Linear SVM =======\n"); CvMySVM svm2; CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm2.train(values, response, train_sidx, var_idx, params2); svm2.train_auto(values, response, var_idx, train_sidx, params2); svm_print_error(&svm2, values, response, response_idx, train_sidx); printf("======NEURONAL NETWORK=====\n"); int num_layers = 3; CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1)); cvmSetI(&layers, 0, 0, values->cols-1); cvmSetI(&layers, 0, 1, num_classes); cvmSetI(&layers, 0, 2, num_classes); CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0); CvANN_MLP_TrainParams ann_params; //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP; CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes); CvMat values2 = cvmat_remove_column(values, response_idx); ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000); //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000); ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx); #if 0 /* slow */ printf("======== Polygonal SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm3; CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY, /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm3.train(values, response, train_sidx, var_idx, params3); svm3.train_auto(values, response, var_idx, train_sidx, params3); svm_print_error(&svm3, values, response, response_idx, train_sidx); #endif return 0; }
int main(int argc, char** argv) { /* STEP 2. Opening the file */ //1. Declare a structure to keep the data CvMLData cvml; //2. Read the file cvml.read_csv("char_dataset.csv"); //cvml.read_csv("strokes_dataset_noresized.csv"); //3. Indicate which column is the response cvml.set_response_idx(0); /* STEP 3. Splitting the samples */ //1. Select 50% for the training (an integer value is also allowed here) CvTrainTestSplit cvtts(0.9f, true); //2. Assign the division to the data cvml.set_train_test_split(&cvtts); /* STEP 4. The training */ //1. Declare the classifier CvBoost boost; ifstream ifile("./trained_boost_char.xml"); if (ifile) { // The file exists, so we don't need to train boost.load("./trained_boost_char.xml", "boost"); } else { //2. Train it with 100 features printf("Training ... \n"); boost.train(&cvml, CvBoostParams(CvBoost::REAL, 2, 0, 1, false, 0), false); } cout<<"after train"<<endl; /* STEP 5. Calculating the testing and training error */ // 1. Declare a couple of vectors to save the predictions of each sample std::vector<float> train_responses, test_responses; // 2. Calculate the training error float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); // 3. Calculate the test error float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); printf("Error train %f \n", fl1); printf("Error test %f \n", fl2); //Try a char static const float arr[] = {0,1.659899,0.684169,0.412175,150.000000,81.000000,0.540000,0.358025,0.151203,0.000000,0.000000}; vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false ); float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true ); printf("\n The sample (360) is predicted as: %f (with number of votes = %f)\n", prediction,votes); //Try a NONchar static const float arr2[] = {0,1.250000,0.433013,0.346410,9.000000,8.000000,0.888889,0.833333,0.375000,0.000000,0.000000}; vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) ); prediction = boost.predict( Mat(sample2), Mat(), Range::all(), false, false ); votes = boost.predict( Mat(sample2), Mat(), Range::all(), false, true ); printf("\n The sample (367) is predicted as: %f (with number of votes = %f)\n", prediction,votes); /* STEP 6. Save your classifier */ // Save the trained classifier boost.save("./trained_boost_char.xml", "boost"); return EXIT_SUCCESS; }