int main() { const int train_sample_count = 300; //#define LEPIOTA #ifdef LEPIOTA const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data"; #else const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data"; #endif CvDTree dtree; CvBoost boost; CvRTrees rtrees; CvERTrees ertrees; CvMLData data; CvTrainTestSplit spl( train_sample_count ); data.read_csv( filename ); #ifdef LEPIOTA data.set_response_idx( 0 ); #else data.set_response_idx( 21 ); data.change_var_type( 21, CV_VAR_CATEGORICAL ); #endif data.set_train_test_split( &spl ); printf("======DTREE=====\n"); dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 )); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() ); #ifdef LEPIOTA printf("======BOOST=====\n"); boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 ); #endif printf("======RTREES=====\n"); rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() ); return 0; }
void CvRTDriver::createClassifier(const CvMat* data, const CvMat* responses, const CvMat* missing, float p_weight) { CvMat* var_type; int i, success = 0, fail = 0; float priors[] = { 1, p_weight }; var_type = cvCreateMat(data->cols + 1, 1, CV_8U); #ifdef CLASSIFY cvSet(var_type, cvScalarAll(CV_VAR_CATEGORICAL)); // all the variables are categorical #else cvSet(var_type, cvScalarAll(CV_VAR_NUMERICAL)); // all the variables are categorical #endif rtree = new CvRTrees; rtree->train(data, CV_ROW_SAMPLE, responses, 0, 0, 0, missing, CvRTParams(20, // max depth 10, // min sample count 0.01f, // regression accuracy: N/A here false, // compute surrogate split, as we have missing data 10, // max number of categories (use sub-optimal algorithm for larger numbers) 0, // the array of priors false, // calculate variable importance 0, // number of variables randomly selected at node and used to find the best split(s). 210, // max number of trees in the forest 0.01f, // forest accuracy CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria )); }
CvRTrees* train_rf(CvMat* predictors, CvMat* labels) { int stat[2]; get_stat(labels, stat); printf("%d negative samples, %d positive samples\n", stat[0], stat[1]); const int tree_count = 500; const float priors[] = {0.25f,0.75f}; CvRTrees* rtrees = new CvRTrees(); CvRTParams rtparams = CvRTParams(5, 10, 0, false, 2, priors, true, (int)sqrt((float)predictors->cols), tree_count, 1e-6, CV_TERMCRIT_ITER + CV_TERMCRIT_EPS); CvMat* var_type = cvCreateMat(predictors->cols + 1, 1, CV_8UC1); for(int i = 0; i < predictors->cols; i++) { *(int*)(var_type->data.ptr + i*var_type->step) = CV_VAR_NUMERICAL; } *(int*)(var_type->data.ptr + predictors->cols*var_type->step) = CV_VAR_CATEGORICAL; rtrees->train(predictors, CV_ROW_SAMPLE, labels, 0, 0, var_type, 0, rtparams); return rtrees; }
RandomTrees::RandomTrees(const unsigned int no_of_features) { this->is_modelfile_loaded_ = false; this->number_of_features_ = no_of_features; this->tree_parameters_ = CvRTParams(10, 10, 0, false, 10, 0, true, ((int)sqrt(no_of_features)), 100, 0.01f, CV_TERMCRIT_ITER); this->tree_parameters_.max_depth = INT_MAX; // max levels in a tree this->tree_parameters_.min_sample_count = 10; // dont split a node if lesser than this number this->tree_parameters_.regression_accuracy = 0; this->tree_parameters_.use_surrogates = false; this->tree_parameters_.max_categories = 10; this->tree_parameters_.priors = 0; this->tree_parameters_.calc_var_importance = true; //true for better evaluation this->tree_parameters_.nactive_vars = ((int)sqrt(no_of_features)); //sqrt(number of features) //this->tree_parameters_.max_num_of_trees_in_the_forest; //this->tree_parameters_.forest_accuracy; //this->tree_parameters_.term_crit = CV_TERMCRIT_ITER; /* int _max_depth, int _min_sample_count, float _regression_accuracy, bool _use_surrogates, int _max_categories, const float* _priors, NO PRIORS bool _calc_var_importance, int _nactive_vars, int max_num_of_trees_in_the_forest, float forest_accuracy, int termcrit_type */ }
/** * @author JIA Pei * @version 2009-10-04 * @brief Training * @param data Input - input data * @param categories Input - column vector * @return classification time cost */ void CClassificationAlgs::Training(const Mat_<float>& data, const Mat_<int>& categories) { unsigned int NbOfSamples = data.rows; set<int> ClassSet; for(int i = 0; i < categories.rows; i++) { ClassSet.insert(categories(i, 0)); } this->m_iNbOfCategories = ClassSet.size(); switch(this->m_iClassificationMethod) { case CClassificationAlgs::DecisionTree: this->m_CVDtree.train( data, CV_ROW_SAMPLE, categories, Mat(), Mat(), Mat(), Mat(), CvDTreeParams( INT_MAX, 2, 0, false, this->m_iNbOfCategories, 0, false, false, 0 ) ); break; case CClassificationAlgs::Boost: this->m_CVBoost.train( data, CV_ROW_SAMPLE, categories, Mat(), Mat(), Mat(), Mat(), CvBoostParams(CvBoost::DISCRETE, 50, 0.95, INT_MAX, false, 0), false ); break; case CClassificationAlgs::RandomForest: this->m_CVRTrees.train( data, CV_ROW_SAMPLE, categories, Mat(), Mat(), Mat(), Mat(), CvRTParams( INT_MAX, 2, 0, false, this->m_iNbOfCategories, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ) ); break; case CClassificationAlgs::ExtremeRandomForest: this->m_CVERTrees.train(data, CV_ROW_SAMPLE, categories, Mat(), Mat(), Mat(), Mat(), CvRTParams( INT_MAX, 2, 0, false, this->m_iNbOfCategories, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ) ); break; case CClassificationAlgs::SVM: this->m_CVSVM.train( data, categories, Mat(), Mat(), CvSVMParams(CvSVM::C_SVC, CvSVM::RBF, 0, 1, 0, 1, 0, 0, NULL, cvTermCriteria(CV_TERMCRIT_ITER, 1000, 1E-6) ) ); break; } }
static int build_rtrees_classifier( char* data_filename, char* filename_to_save, char* filename_to_load ) { CvMat* data = 0; CvMat* responses = 0; CvMat* var_type = 0; CvMat* sample_idx = 0; int ok = read_num_class_data( data_filename, 16, &data, &responses ); int nsamples_all = 0, ntrain_samples = 0; int i = 0; double train_hr = 0, test_hr = 0; CvRTrees forest; CvMat* var_importance = 0; if( !ok ) { printf( "Could not read the database %s\n", data_filename ); return -1; } printf( "The database %s is loaded.\n", data_filename ); nsamples_all = data->rows; ntrain_samples = (int)(nsamples_all*0.8); // Create or load Random Trees classifier if( filename_to_load ) { // load classifier from the specified file forest.load( filename_to_load ); ntrain_samples = 0; if( forest.get_tree_count() == 0 ) { printf( "Could not read the classifier %s\n", filename_to_load ); return -1; } printf( "The classifier %s is loaded.\n", data_filename ); } else { // create classifier by using <data> and <responses> printf( "Training the classifier ...\n"); // 1. create type mask var_type = cvCreateMat( data->cols + 1, 1, CV_8U ); cvSet( var_type, cvScalarAll(CV_VAR_ORDERED) ); cvSetReal1D( var_type, data->cols, CV_VAR_CATEGORICAL ); // 2. create sample_idx sample_idx = cvCreateMat( 1, nsamples_all, CV_8UC1 ); { CvMat mat; cvGetCols( sample_idx, &mat, 0, ntrain_samples ); cvSet( &mat, cvRealScalar(1) ); cvGetCols( sample_idx, &mat, ntrain_samples, nsamples_all ); cvSetZero( &mat ); } // 3. train classifier forest.train( data, CV_ROW_SAMPLE, responses, 0, sample_idx, var_type, 0, CvRTParams(10,10,0,false,15,0,true,4,100,0.01f,CV_TERMCRIT_ITER)); printf( "\n"); } // compute prediction error on train and test data for( i = 0; i < nsamples_all; i++ ) { double r; CvMat sample; cvGetRow( data, &sample, i ); r = forest.predict( &sample ); r = fabs((double)r - responses->data.fl[i]) <= FLT_EPSILON ? 1 : 0; if( i < ntrain_samples ) train_hr += r; else test_hr += r; } test_hr /= (double)(nsamples_all-ntrain_samples); train_hr /= (double)ntrain_samples; printf( "Recognition rate: train = %.1f%%, test = %.1f%%\n", train_hr*100., test_hr*100. ); printf( "Number of trees: %d\n", forest.get_tree_count() ); // Print variable importance var_importance = (CvMat*)forest.get_var_importance(); if( var_importance ) { double rt_imp_sum = cvSum( var_importance ).val[0]; printf("var#\timportance (in %%):\n"); for( i = 0; i < var_importance->cols; i++ ) printf( "%-2d\t%-4.1f\n", i, 100.f*var_importance->data.fl[i]/rt_imp_sum); } //Print some proximitites printf( "Proximities between some samples corresponding to the letter 'T':\n" ); { CvMat sample1, sample2; const int pairs[][2] = {{0,103}, {0,106}, {106,103}, {-1,-1}}; for( i = 0; pairs[i][0] >= 0; i++ ) { cvGetRow( data, &sample1, pairs[i][0] ); cvGetRow( data, &sample2, pairs[i][1] ); printf( "proximity(%d,%d) = %.1f%%\n", pairs[i][0], pairs[i][1], forest.get_proximity( &sample1, &sample2 )*100. ); } } // Save Random Trees classifier to file if needed if( filename_to_save ) forest.save( filename_to_save ); cvReleaseMat( &sample_idx ); cvReleaseMat( &var_type ); cvReleaseMat( &data ); cvReleaseMat( &responses ); return 0; }
int main(int argc, char** argv) { // std::cout<<FLT_EPSILON<<std::endl; cv::Mat training_data, training_labels,testing_data, testing_labels; training_data = read_rgbd_data_cv(argv[1],NUMBER_OF_TRAINING_SAMPLES); training_labels = read_rgbd_data_cv(argv[2], NUMBER_OF_TRAINING_SAMPLES); testing_data = read_rgbd_data_cv(argv[3],NUMBER_OF_TESTING_SAMPLES); testing_labels = read_rgbd_data_cv(argv[4], NUMBER_OF_TESTING_SAMPLES); printf("dataset specs: %d samples with %d features\n", training_data.rows, training_data.cols); // define all the attributes as numerical // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) // that can be assigned on a per attribute basis cv::Mat var_type = cv::Mat(training_data.cols + 1, 1, CV_8U ); var_type.setTo(cv::Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical var_type.at<uchar>(training_data.cols, 0) = CV_VAR_CATEGORICAL; // the labels are categorical /********************************步骤1:定义初始化Random Trees的参数******************************/ float priors[] = {1,1,1,1,1}; // weights of each classification for classes CvRTParams params = CvRTParams(25, // max depth 50, // min sample count 0, // regression accuracy: N/A here false, // compute surrogate split, no missing data 15, // max number of categories (use sub-optimal algorithm for larger numbers) priors, // the array of priors false, // calculate variable importance 20, // number of variables randomly selected at node and used to find the best split(s). NUMBER_OF_TREES, // max number of trees in the forest 0.01f, // forrest accuracy CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria ); /****************************步骤2:训练 Random Decision Forest(RDF)分类器*********************/ // printf( "\nUsing training database: %s\n\n", argv[1]); CvRTrees* rtree = new CvRTrees; rtree->train(training_data, CV_ROW_SAMPLE, training_labels, cv::Mat(), cv::Mat(), var_type, cv::Mat(), params); // perform classifier testing and report results cv::Mat test_sample, train_sample; int correct_class = 0; int wrong_class = 0; int result; int label; int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0}; int false_negatives [NUMBER_OF_CLASSES] = {0,0,0,0,0}; CvDTreeNode* leaf_nodes [training_data.rows]; for (int tsample = 0; tsample < training_data.rows; tsample++) { train_sample = training_data.row(tsample); CvForestTree* tree = rtree->get_tree(1); CvDTreeNode* leaf_node = tree->predict(train_sample, cv::Mat()); leaf_nodes[tsample] = leaf_node; } // printf( "\nUsing testing database: %s\n\n", argv[2]); for (int tsample = 0; tsample < testing_data.rows; tsample++) { // extract a row from the testing matrix test_sample = testing_data.row(tsample); // train on the testing data: // test_sample = training_data.row(tsample); /********************************步骤3:预测*********************************************/ result = (int) rtree->predict(test_sample, cv::Mat()); label = (int) testing_labels.at<float>(tsample, 0); printf("Testing Sample %i -> class result (digit %d) - label (digit %d)\n", tsample, result, label); // get the leaf nodes of the first tree in the forest /*CvForestTree* tree = rtree->get_tree(0); std::list<const CvDTreeNode*> leaf_list; leaf_list = get_leaf_node( tree ); printf("Number of Leaf nodes: %ld\n", leaf_list.size());*/ // if the prediction and the (true) testing classification are the same // (N.B. openCV uses a floating point decision tree implementation!) if (fabs(result - label) >= FLT_EPSILON) { // if they differ more than floating point error => wrong class wrong_class++; false_positives[(int) result]++; false_negatives[(int) testing_labels.at<float>(tsample, 0)]++; } else { // otherwise correct correct_class++; } } printf( // "\nResults on the testing database: %s\n" "\tCorrect classification: %d (%g%%)\n" "\tWrong classifications: %d (%g%%)\n", // argv[2], correct_class, (double) correct_class*100/testing_data.rows, wrong_class, (double) wrong_class*100/testing_data.rows); for (int i = 0; i < NUMBER_OF_CLASSES; i++) { printf( "\tClass (digit %d) false postives %d (%g%%)\n\t false negatives %d (%g%%)\n", i, false_positives[i], (double) false_positives[i]*100/testing_data.rows, false_negatives[i], (double) false_negatives[i]*100/testing_data.rows); } // get all the leaf nodes in the forest for (int i = 0; i < NUMBER_OF_TREES; i ++) { CvForestTree* tree = rtree->get_tree(i); std::list<const CvDTreeNode*> leaf_list; leaf_list = get_leaf_node( tree ); } //get training_sample indices for leaf nodes std::list<leaf_samples> node_indices; for (int i = 0; i < training_data.rows; i++) { CvDTreeNode* leaf_node = leaf_nodes[i]; if (leaf_node != NULL) { leaf_samples leaf_sample; leaf_sample.leaf = leaf_node; leaf_sample.indices.push_front(i); printf("\nValue of leaf: %f\n", leaf_node->value); printf("Smaple indices for leaf:\n"); printf(" %d", i); for (int j=i+1; j < training_data.rows; j++) { if (leaf_node == leaf_nodes[j]) { leaf_sample.indices.push_front(j); printf(" %lu", j); leaf_nodes[j] = NULL; } } node_indices.push_front(leaf_sample); } } printf("\nSize of node_indices: %d\n", node_indices.size()); //get labels and features //get double pointers for features and labels const double* p = testing_data.ptr<double>(0); std::vector<double> vec(p, p + testing_data.cols); // all matrix memory free by destructors // all OK : main returns 0 // result = rtree->predict(testing_data.row(79), cv::Mat()); // float andi = result - testing_labels.at<float>(79, 0); // // std::cout<<training_labels.row(0).col(0)<<std::endl; // std::cout<<andi<<std::endl; return 0; }
void OpencvRFclassifier::learn(std::vector< std::vector<float> >& pfeatures, std::vector<int>& plabels){ if (_rf){ delete _rf; _trees.clear(); _tree_weights.clear(); } _rf = new CvRTrees; int rows = pfeatures.size(); int cols = pfeatures[0].size(); printf("Number of samples and dimensions: %d, %d\n",rows, cols); if ((rows<1)||(cols<1)){ return; } // clock_t start = clock(); std::time_t start, end; std::time(&start); CvMat *features = cvCreateMat(rows, cols, CV_32F); CvMat *labels = cvCreateMat(rows, 1 , CV_32F); float* datap = features->data.fl; float* labelp = labels->data.fl; int numzeros=0; for(int i=0; i < rows; i++){ labelp[i] = plabels[i]; numzeros += ( labelp[i] == -1? 1 : 0 ); for(int j=0; j < cols ; j++){ datap[i*cols+j] = (float)pfeatures[i][j]; } } printf("Number of merge: %d\n",numzeros); // 1. create type mask CvMat* var_type = cvCreateMat( features->cols + 1, 1, CV_8U ); cvSet( var_type, cvScalarAll(CV_VAR_NUMERICAL) ); cvSetReal1D( var_type, features->cols, CV_VAR_CATEGORICAL ); // define the parameters for training the random forest (trees) float priors[] = {1,1}; // weights of each classification for classes // (all equal as equal samples of each digit) CvRTParams params = CvRTParams( _max_depth, // max depth :the depth of the tree 10, // min sample count: minimum samples required at a leaf node for it to be split 0, // regression accuracy: N/A here false, // compute surrogate split, no missing data 15, // max number of categories (use sub-optimal algorithm for larger numbers) priors, // the array of prior for each class false, // calculate variable importance 5, // number of variables randomly selected at node and used to find the best split(s). _tree_count, // max number of trees in the forest 0.001f, // forest accuracy CV_TERMCRIT_ITER //| CV_TERMCRIT_EPS // termination cirteria ); // 3. train classifier _rf->train( features, CV_ROW_SAMPLE, labels, 0, 0, var_type, 0, params); //CvRTParams(10,10,0,false,15,0,true,4,100,0.01f,CV_TERMCRIT_ITER)); float correct = 0; for(int i = 0; i < features->rows ; i++ ){ float r; CvMat sample; cvGetRow( features, &sample, i ); r = _rf->predict_prob( &sample ); r = (r>0.5)? 1 :-1; r = fabs((float)r - labels->data.fl[i]) <= FLT_EPSILON ? 1 : 0; correct += r; } std::time(&end); printf("Time required to learn RF: %.2f sec\n", (difftime(end,start))*1.0); // printf("Time required to learn RF: %.2f sec\n", ((float)clock() - start) / CLOCKS_PER_SEC); printf("with training set accuracy :%.3f\n", correct/features->rows*100.); _tree_count = _rf->get_tree_count(); for(int i = 0; i < _tree_count; i++){ CvForestTree* treep = _rf->get_tree(i); _trees.push_back(treep); } //int ntrees = _rf->get_tree_count(); _tree_weights.resize(_tree_count, 1.0/_tree_count); cvReleaseMat( &features ); cvReleaseMat( &labels ); cvReleaseMat( &var_type ); }
int main() { const int train_sample_count = 300; bool is_regression = false; const char* filename = "data/waveform.data"; int response_idx = 21; CvMLData data; CvTrainTestSplit spl( train_sample_count ); if(data.read_csv(filename) != 0) { printf("couldn't read %s\n", filename); exit(0); } data.set_response_idx(response_idx); data.change_var_type(response_idx, CV_VAR_CATEGORICAL); data.set_train_test_split( &spl ); const CvMat* values = data.get_values(); const CvMat* response = data.get_responses(); const CvMat* missing = data.get_missing(); const CvMat* var_types = data.get_var_types(); const CvMat* train_sidx = data.get_train_sample_idx(); const CvMat* var_idx = data.get_var_idx(); CvMat*response_map; CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL); int num_classes = response_map->cols; CvDTree dtree; printf("======DTREE=====\n"); CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0); dtree.train( &data, cvd_params); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() ); #if 0 /* boosted trees are only implemented for two classes */ printf("======BOOST=====\n"); CvBoost boost; boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 ); #endif printf("======RTREES=====\n"); CvRTrees rtrees; rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); CvERTrees ertrees; ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() ); printf("======GBTREES=====\n"); CvGBTrees gbtrees; CvGBTreesParams gbparams; gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression gbtrees.train( &data, gbparams); //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx); print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0); printf("======KNEAREST=====\n"); CvKNearest knearest; //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses, // const Mat& _sample_idx, bool _is_regression, // int _max_k, bool _update_base ) bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL; assert(is_classifier); int max_k = 10; knearest.train(values, response, train_sidx, is_regression, max_k, false); CvMat* new_response = cvCreateMat(response->rows, 1, values->type); //print_types(); //const CvMat* train_sidx = data.get_train_sample_idx(); knearest.find_nearest(values, max_k, new_response, 0, 0, 0); print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR), knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0); printf("======== RBF SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm1; CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm1.train(values, response, train_sidx, var_idx, params1); svm1.train_auto(values, response, var_idx, train_sidx, params1); svm_print_error(&svm1, values, response, response_idx, train_sidx); printf("======== Linear SVM =======\n"); CvMySVM svm2; CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm2.train(values, response, train_sidx, var_idx, params2); svm2.train_auto(values, response, var_idx, train_sidx, params2); svm_print_error(&svm2, values, response, response_idx, train_sidx); printf("======NEURONAL NETWORK=====\n"); int num_layers = 3; CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1)); cvmSetI(&layers, 0, 0, values->cols-1); cvmSetI(&layers, 0, 1, num_classes); cvmSetI(&layers, 0, 2, num_classes); CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0); CvANN_MLP_TrainParams ann_params; //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP; CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes); CvMat values2 = cvmat_remove_column(values, response_idx); ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000); //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000); ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx); #if 0 /* slow */ printf("======== Polygonal SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm3; CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY, /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm3.train(values, response, train_sidx, var_idx, params3); svm3.train_auto(values, response, var_idx, train_sidx, params3); svm_print_error(&svm3, values, response, response_idx, train_sidx); #endif return 0; }
int CV_ERTreesTest :: train( int test_case_idx ) { int MAX_DEPTH, MIN_SAMPLE_COUNT, MAX_CATEGORIES, CV_FOLDS, NACTIVE_VARS, MAX_TREES_NUM; float REG_ACCURACY = 0, OOB_EPS = 0.0; bool USE_SURROGATE, IS_PRUNED; const char* data_name = ((CvFileNode*)cvGetSeqElem( data_sets_names, test_case_idx ))->data.str.ptr; // read validation params CvFileStorage* fs = ts->get_file_storage(); CvFileNode* fnode = cvGetFileNodeByName( fs, 0, "validation" ), *fnode1 = 0; fnode = cvGetFileNodeByName( fs, fnode, name ); fnode = cvGetFileNodeByName( fs, fnode, data_name ); fnode = cvGetFileNodeByName( fs, fnode, "model_params" ); fnode1 = cvGetFileNodeByName( fs, fnode, "max_depth" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "MAX_DEPTH can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } MAX_DEPTH = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "min_sample_count" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "MIN_SAMPLE_COUNT can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } MIN_SAMPLE_COUNT = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "use_surrogate" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "USE_SURROGATE can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } USE_SURROGATE = (fnode1->data.i != 0); fnode1 = cvGetFileNodeByName( fs, fnode, "max_categories" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "MAX_CATEGORIES can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } MAX_CATEGORIES = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "cv_folds" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "CV_FOLDS can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } CV_FOLDS = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "is_pruned" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "IS_PRUNED can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } IS_PRUNED = (fnode1->data.i != 0); fnode1 = cvGetFileNodeByName( fs, fnode, "nactive_vars" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "NACTIVE_VARS can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } NACTIVE_VARS = fnode1->data.i; fnode1 = cvGetFileNodeByName( fs, fnode, "max_trees_num" ); if ( !fnode1 ) { ts->printf( CvTS::LOG, "MAX_TREES_NUM can not be read from config file" ); return CvTS::FAIL_INVALID_TEST_DATA; } MAX_TREES_NUM = fnode1->data.i; if ( !ertrees->train( &data, CvRTParams( MAX_DEPTH, MIN_SAMPLE_COUNT, REG_ACCURACY, USE_SURROGATE, MAX_CATEGORIES, 0, false, // (calc_var_importance == true) <=> RF processes variable importance NACTIVE_VARS, MAX_TREES_NUM, OOB_EPS, CV_TERMCRIT_ITER)) ) { ts->printf( CvTS::LOG, "in test case %d model training was failed", test_case_idx ); return CvTS::FAIL_INVALID_OUTPUT; } return CvTS::OK; }