int main() { const int train_sample_count = 300; //#define LEPIOTA #ifdef LEPIOTA const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data"; #else const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data"; #endif CvDTree dtree; CvBoost boost; CvRTrees rtrees; CvERTrees ertrees; CvMLData data; CvTrainTestSplit spl( train_sample_count ); data.read_csv( filename ); #ifdef LEPIOTA data.set_response_idx( 0 ); #else data.set_response_idx( 21 ); data.change_var_type( 21, CV_VAR_CATEGORICAL ); #endif data.set_train_test_split( &spl ); printf("======DTREE=====\n"); dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 )); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() ); #ifdef LEPIOTA printf("======BOOST=====\n"); boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 ); #endif printf("======RTREES=====\n"); rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() ); return 0; }
void find_decision_boundary_ERT() { img.copyTo( imgDst ); Mat trainSamples, trainClasses; prepare_train_data( trainSamples, trainClasses ); // learn classifier CvERTrees ertrees; Mat var_types( 1, trainSamples.cols + 1, CV_8UC1, Scalar(CV_VAR_ORDERED) ); var_types.at<uchar>( trainSamples.cols ) = CV_VAR_CATEGORICAL; CvRTParams params( 4, // max_depth, 2, // min_sample_count, 0.f, // regression_accuracy, false, // use_surrogates, 16, // max_categories, 0, // priors, false, // calc_var_importance, 1, // nactive_vars, 5, // max_num_of_trees_in_the_forest, 0, // forest_accuracy, CV_TERMCRIT_ITER // termcrit_type ); ertrees.train( trainSamples, CV_ROW_SAMPLE, trainClasses, Mat(), Mat(), var_types, Mat(), params ); Mat testSample(1, 2, CV_32FC1 ); for( int y = 0; y < img.rows; y += testStep ) { for( int x = 0; x < img.cols; x += testStep ) { testSample.at<float>(0) = (float)x; testSample.at<float>(1) = (float)y; int response = (int)ertrees.predict( testSample ); circle( imgDst, Point(x,y), 2, classColors[response], 1 ); } } }
int main() { const int train_sample_count = 300; bool is_regression = false; const char* filename = "data/waveform.data"; int response_idx = 21; CvMLData data; CvTrainTestSplit spl( train_sample_count ); if(data.read_csv(filename) != 0) { printf("couldn't read %s\n", filename); exit(0); } data.set_response_idx(response_idx); data.change_var_type(response_idx, CV_VAR_CATEGORICAL); data.set_train_test_split( &spl ); const CvMat* values = data.get_values(); const CvMat* response = data.get_responses(); const CvMat* missing = data.get_missing(); const CvMat* var_types = data.get_var_types(); const CvMat* train_sidx = data.get_train_sample_idx(); const CvMat* var_idx = data.get_var_idx(); CvMat*response_map; CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL); int num_classes = response_map->cols; CvDTree dtree; printf("======DTREE=====\n"); CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0); dtree.train( &data, cvd_params); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() ); #if 0 /* boosted trees are only implemented for two classes */ printf("======BOOST=====\n"); CvBoost boost; boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 ); #endif printf("======RTREES=====\n"); CvRTrees rtrees; rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); CvERTrees ertrees; ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() ); printf("======GBTREES=====\n"); CvGBTrees gbtrees; CvGBTreesParams gbparams; gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression gbtrees.train( &data, gbparams); //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx); print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0); printf("======KNEAREST=====\n"); CvKNearest knearest; //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses, // const Mat& _sample_idx, bool _is_regression, // int _max_k, bool _update_base ) bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL; assert(is_classifier); int max_k = 10; knearest.train(values, response, train_sidx, is_regression, max_k, false); CvMat* new_response = cvCreateMat(response->rows, 1, values->type); //print_types(); //const CvMat* train_sidx = data.get_train_sample_idx(); knearest.find_nearest(values, max_k, new_response, 0, 0, 0); print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR), knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0); printf("======== RBF SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm1; CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm1.train(values, response, train_sidx, var_idx, params1); svm1.train_auto(values, response, var_idx, train_sidx, params1); svm_print_error(&svm1, values, response, response_idx, train_sidx); printf("======== Linear SVM =======\n"); CvMySVM svm2; CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm2.train(values, response, train_sidx, var_idx, params2); svm2.train_auto(values, response, var_idx, train_sidx, params2); svm_print_error(&svm2, values, response, response_idx, train_sidx); printf("======NEURONAL NETWORK=====\n"); int num_layers = 3; CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1)); cvmSetI(&layers, 0, 0, values->cols-1); cvmSetI(&layers, 0, 1, num_classes); cvmSetI(&layers, 0, 2, num_classes); CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0); CvANN_MLP_TrainParams ann_params; //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP; CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes); CvMat values2 = cvmat_remove_column(values, response_idx); ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000); //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000); ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx); #if 0 /* slow */ printf("======== Polygonal SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm3; CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY, /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm3.train(values, response, train_sidx, var_idx, params3); svm3.train_auto(values, response, var_idx, train_sidx, params3); svm_print_error(&svm3, values, response, response_idx, train_sidx); #endif return 0; }
int CV_ERTreesTest :: train( int test_case_idx ) { int MAX_DEPTH, MIN_SAMPLE_COUNT, MAX_CATEGORIES, CV_FOLDS, NACTIVE_VARS, MAX_TREES_NUM; float REG_ACCURACY = 0, OOB_EPS = 0.0; bool USE_SURROGATE, IS_PRUNED; const char* data_name = ((CvFileNode*)cvGetSeqElem( data_sets_names, test_case_idx ))->data.str.ptr; // read validation params CvFileStorage* fs = ts->get_file_storage(); CvFileNode* fnode = cvGetFileNodeByName( fs, 0, "validation" ), *fnode1 = 0; fnode = cvGetFileNodeByName( fs, fnode, name ); fnode = cvGetFileNodeByName( fs, fnode, data_name ); fnode = cvGetFileNodeByName( fs, fnode, "model_params" ); fnode1 = cvGetFileNodeByName( fs, fnode, "max_depth" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "MAX_DEPTH can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } MAX_DEPTH = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "min_sample_count" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "MIN_SAMPLE_COUNT can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } MIN_SAMPLE_COUNT = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "use_surrogate" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "USE_SURROGATE can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } USE_SURROGATE = (fnode1->data.i != 0); fnode1 = cvGetFileNodeByName( fs, fnode, "max_categories" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "MAX_CATEGORIES can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } MAX_CATEGORIES = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "cv_folds" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "CV_FOLDS can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } CV_FOLDS = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "is_pruned" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "IS_PRUNED can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } IS_PRUNED = (fnode1->data.i != 0); fnode1 = cvGetFileNodeByName( fs, fnode, "nactive_vars" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "NACTIVE_VARS can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } NACTIVE_VARS = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "max_trees_num" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "MAX_TREES_NUM can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } MAX_TREES_NUM = fnode1->data.i; if ( !ertrees->train( &data, CvRTParams( MAX_DEPTH, MIN_SAMPLE_COUNT, REG_ACCURACY, USE_SURROGATE, MAX_CATEGORIES, 0, false, // (calc_var_importance == true) <=> RF processes variable importance NACTIVE_VARS, MAX_TREES_NUM, OOB_EPS, CV_TERMCRIT_ITER)) ) { ts->printf( CvTS::LOG, "in test case %d model training was failed", test_case_idx ); return CvTS::FAIL_INVALID_OUTPUT; } return CvTS::OK; }