void Model::Train_rtrees( const SampleSet& samples ) { CvRTrees* model = (CvRTrees*)m_pModel; CvRTParams* para = (CvRTParams*)m_trainPara; model->train(samples.Samples(), CV_ROW_SAMPLE, samples.Labels(), cv::Mat(), cv::Mat(), cv::Mat(), cv::Mat(), *para); }
int main() { const int train_sample_count = 300; //#define LEPIOTA #ifdef LEPIOTA const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data"; #else const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data"; #endif CvDTree dtree; CvBoost boost; CvRTrees rtrees; CvERTrees ertrees; CvMLData data; CvTrainTestSplit spl( train_sample_count ); data.read_csv( filename ); #ifdef LEPIOTA data.set_response_idx( 0 ); #else data.set_response_idx( 21 ); data.change_var_type( 21, CV_VAR_CATEGORICAL ); #endif data.set_train_test_split( &spl ); printf("======DTREE=====\n"); dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 )); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() ); #ifdef LEPIOTA printf("======BOOST=====\n"); boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 ); #endif printf("======RTREES=====\n"); rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() ); return 0; }
void test() { CvMat* tmp_mat = cvCreateMat(1, feat_count_, CV_32FC1); int pos_right = 0; int pos_total = 0; for (vector< vector<float> >::iterator i = pos_data_.begin(); i != pos_data_.end(); i++) { for (int k = 0; k < feat_count_; k++) tmp_mat->data.fl[k] = (float)((*i)[k]); if (forest.predict(tmp_mat) > 0) pos_right++; pos_total++; } int neg_right = 0; int neg_total = 0; for (vector< vector<float> >::iterator i = neg_data_.begin(); i != neg_data_.end(); i++) { for (int k = 0; k < feat_count_; k++) tmp_mat->data.fl[k] = (float)((*i)[k]); if (forest.predict(tmp_mat) < 0) neg_right++; neg_total++; } int test_right = 0; int test_total = 0; for (vector< vector<float> >::iterator i = test_data_.begin(); i != test_data_.end(); i++) { for (int k = 0; k < feat_count_; k++) tmp_mat->data.fl[k] = (float)((*i)[k]); if (forest.predict(tmp_mat) > 0) test_right++; test_total++; } printf(" Pos train set: %d/%d %g\n", pos_right, pos_total, (float)(pos_right) / pos_total); printf(" Neg train set: %d/%d %g\n", neg_right, neg_total, (float)(neg_right) / neg_total); printf(" Test set: %d/%d %g\n", test_right, test_total, (float)(test_right) / test_total); cvReleaseMat(&tmp_mat); }
void Model::Predict_rtrees( const SampleSet& samples, SampleSet& outError ) { int true_resp = 0; CvRTrees *model = (CvRTrees*)m_pModel; for (int i = 0; i < samples.N(); i++) { float ret = model->predict(samples.GetSampleAt(i), cv::Mat()); if (ret != samples.GetLabelAt(i)) { outError.Add(samples.GetSampleAt(i), samples.GetLabelAt(i)); } else { true_resp++; } } printf("%d %d",samples.N(), true_resp); }
CvRTrees* train_rf(CvMat* predictors, CvMat* labels) { int stat[2]; get_stat(labels, stat); printf("%d negative samples, %d positive samples\n", stat[0], stat[1]); const int tree_count = 500; const float priors[] = {0.25f,0.75f}; CvRTrees* rtrees = new CvRTrees(); CvRTParams rtparams = CvRTParams(5, 10, 0, false, 2, priors, true, (int)sqrt((float)predictors->cols), tree_count, 1e-6, CV_TERMCRIT_ITER + CV_TERMCRIT_EPS); CvMat* var_type = cvCreateMat(predictors->cols + 1, 1, CV_8UC1); for(int i = 0; i < predictors->cols; i++) { *(int*)(var_type->data.ptr + i*var_type->step) = CV_VAR_NUMERICAL; } *(int*)(var_type->data.ptr + predictors->cols*var_type->step) = CV_VAR_CATEGORICAL; rtrees->train(predictors, CV_ROW_SAMPLE, labels, 0, 0, var_type, 0, rtparams); return rtrees; }
/* Examines the values at each leaf node in order to see what the distribution of data we put in is doing */ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { ASSERT_NUM_RHS_ARGS_EQUALS(1); const mxArray* forest_ptr = prhs[0]; ASSERT_IS_POINTER(forest_ptr); CvRTrees *forest = (CvRTrees *) unpack_pointer(forest_ptr); // We are going to return a cell array with one cell per tree, so need this number int num_trees = forest->get_tree_count(); mexPrintf("Loaded forest of %d trees, retrieving leave node values.\n", num_trees); mxArray *output_cell_array = mxCreateCellMatrix(1, num_trees); ASSERT_NON_NULL(output_cell_array); for (unsigned int t = 0; t < num_trees; t++) { mxArray* tree_struct = mxCreateStructArray(num_dims, dims, tree_num_fields, tree_field_names); ASSERT_NON_NULL(tree_struct); mxSetCell(output_cell_array, t, make_matlab_tree_struct(forest->get_tree(t))); } plhs[0] = output_cell_array; }
void train() { int sample_size = pos_data_.size() + neg_data_.size(); feat_count_ = pos_data_[0].size(); CvMat* cv_data = cvCreateMat(sample_size, feat_count_, CV_32FC1); CvMat* cv_resp = cvCreateMat(sample_size, 1, CV_32S); // Put positive data in opencv format. int j = 0; for (vector< vector<float> >::iterator i = pos_data_.begin(); i != pos_data_.end(); i++) { float* data_row = (float*)(cv_data->data.ptr + cv_data->step * j); for (int k = 0; k < feat_count_; k++) data_row[k] = (*i)[k]; cv_resp->data.i[j] = 1; j++; } // Put negative data in opencv format. for (vector< vector<float> >::iterator i = neg_data_.begin(); i != neg_data_.end(); i++) { float* data_row = (float*)(cv_data->data.ptr + cv_data->step * j); for (int k = 0; k < feat_count_; k++) data_row[k] = (*i)[k]; cv_resp->data.i[j] = -1; j++; } CvMat* var_type = cvCreateMat(1, feat_count_ + 1, CV_8U); cvSet(var_type, cvScalarAll(CV_VAR_ORDERED)); cvSetReal1D(var_type, feat_count_, CV_VAR_CATEGORICAL); float priors[] = {1.0, 1.0}; CvRTParams fparam(8, 20, 0, false, 10, priors, false, 5, 50, 0.001f, CV_TERMCRIT_ITER); fparam.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, 100, 0.1); forest.train(cv_data, CV_ROW_SAMPLE, cv_resp, 0, 0, var_type, 0, fparam); cvReleaseMat(&cv_data); cvReleaseMat(&cv_resp); cvReleaseMat(&var_type); }
void normalValidation( DataSet& data, TrainResult& result) { //these vars not needed - use empty Mat Mat varIdx, missingDataMask; Mat sampleIdx; result.train_hr = 0; result.test_hr = 0; result.fpRate = 0; result.fnRate = 0; // printf( "numSamples %d", data.numSamples); //CvBoostTree boost; //define test and trainingsset float partTrain = 1.0/8.0; sampleIdx = Mat(1,data.numSamples,CV_8U,1.0); int negIdx = (int)floor(partTrain*data.numNeg); sampleIdx.colRange(negIdx*5, negIdx*6) = 0.0; int posIdx = (int)floor( partTrain*data.numPos ); sampleIdx.colRange( data.numNeg+posIdx*5, data.numNeg + posIdx*6) = 0.0; //int numT = (cv::sum( sampleIdx ))[0]; //printf("sample Idx sum (trainsamples): %d\n",numT); int numTestSamples = negIdx + posIdx; printf("numSamples: %d -- numTrainSamples: %d -- numTestSamples: %d\n",data.numSamples, data.numSamples-numTestSamples, numTestSamples ); //training forest.train(data.data, CV_ROW_SAMPLE, data.responses, varIdx, sampleIdx, data.varType, missingDataMask, forestParams); //booster.train(data.data, CV_ROW_SAMPLE, data.responses, varIdx, sampleIdx, data.varType, missingDataMask, boostParams); //evaluation evaluation(forest, data, sampleIdx, result); double sum = (cv::sum(result.var_importance))[0]; result.var_importance /= sum; printf( "____\nRecognition rate: train = %.2f%%, test = %.2f%% -- overall FN = %.2f%%, FP = %.2f%%\n", result.train_hr*100., result.test_hr*100. ,result.fnRate*100. ,result.fpRate*100.); }
void find_decision_boundary_RF() { img.copyTo( imgDst ); Mat trainSamples, trainClasses; prepare_train_data( trainSamples, trainClasses ); // learn classifier CvRTrees rtrees; CvRTParams params( 4, // max_depth, 2, // min_sample_count, 0.f, // regression_accuracy, false, // use_surrogates, 16, // max_categories, 0, // priors, false, // calc_var_importance, 1, // nactive_vars, 5, // max_num_of_trees_in_the_forest, 0, // forest_accuracy, CV_TERMCRIT_ITER // termcrit_type ); rtrees.train( trainSamples, CV_ROW_SAMPLE, trainClasses, Mat(), Mat(), Mat(), Mat(), params ); Mat testSample(1, 2, CV_32FC1 ); for( int y = 0; y < img.rows; y += testStep ) { for( int x = 0; x < img.cols; x += testStep ) { testSample.at<float>(0) = (float)x; testSample.at<float>(1) = (float)y; int response = (int)rtrees.predict( testSample ); circle( imgDst, Point(x,y), 2, classColors[response], 1 ); } } }
static int build_rtrees_classifier( char* data_filename, char* filename_to_save, char* filename_to_load ) { CvMat* data = 0; CvMat* responses = 0; CvMat* var_type = 0; CvMat* sample_idx = 0; int ok = read_num_class_data( data_filename, 16, &data, &responses ); int nsamples_all = 0, ntrain_samples = 0; int i = 0; double train_hr = 0, test_hr = 0; CvRTrees forest; CvMat* var_importance = 0; if( !ok ) { printf( "Could not read the database %s\n", data_filename ); return -1; } printf( "The database %s is loaded.\n", data_filename ); nsamples_all = data->rows; ntrain_samples = (int)(nsamples_all*0.8); // Create or load Random Trees classifier if( filename_to_load ) { // load classifier from the specified file forest.load( filename_to_load ); ntrain_samples = 0; if( forest.get_tree_count() == 0 ) { printf( "Could not read the classifier %s\n", filename_to_load ); return -1; } printf( "The classifier %s is loaded.\n", data_filename ); } else { // create classifier by using <data> and <responses> printf( "Training the classifier ...\n"); // 1. create type mask var_type = cvCreateMat( data->cols + 1, 1, CV_8U ); cvSet( var_type, cvScalarAll(CV_VAR_ORDERED) ); cvSetReal1D( var_type, data->cols, CV_VAR_CATEGORICAL ); // 2. create sample_idx sample_idx = cvCreateMat( 1, nsamples_all, CV_8UC1 ); { CvMat mat; cvGetCols( sample_idx, &mat, 0, ntrain_samples ); cvSet( &mat, cvRealScalar(1) ); cvGetCols( sample_idx, &mat, ntrain_samples, nsamples_all ); cvSetZero( &mat ); } // 3. train classifier forest.train( data, CV_ROW_SAMPLE, responses, 0, sample_idx, var_type, 0, CvRTParams(10,10,0,false,15,0,true,4,100,0.01f,CV_TERMCRIT_ITER)); printf( "\n"); } // compute prediction error on train and test data for( i = 0; i < nsamples_all; i++ ) { double r; CvMat sample; cvGetRow( data, &sample, i ); r = forest.predict( &sample ); r = fabs((double)r - responses->data.fl[i]) <= FLT_EPSILON ? 1 : 0; if( i < ntrain_samples ) train_hr += r; else test_hr += r; } test_hr /= (double)(nsamples_all-ntrain_samples); train_hr /= (double)ntrain_samples; printf( "Recognition rate: train = %.1f%%, test = %.1f%%\n", train_hr*100., test_hr*100. ); printf( "Number of trees: %d\n", forest.get_tree_count() ); // Print variable importance var_importance = (CvMat*)forest.get_var_importance(); if( var_importance ) { double rt_imp_sum = cvSum( var_importance ).val[0]; printf("var#\timportance (in %%):\n"); for( i = 0; i < var_importance->cols; i++ ) printf( "%-2d\t%-4.1f\n", i, 100.f*var_importance->data.fl[i]/rt_imp_sum); } //Print some proximitites printf( "Proximities between some samples corresponding to the letter 'T':\n" ); { CvMat sample1, sample2; const int pairs[][2] = {{0,103}, {0,106}, {106,103}, {-1,-1}}; for( i = 0; pairs[i][0] >= 0; i++ ) { cvGetRow( data, &sample1, pairs[i][0] ); cvGetRow( data, &sample2, pairs[i][1] ); printf( "proximity(%d,%d) = %.1f%%\n", pairs[i][0], pairs[i][1], forest.get_proximity( &sample1, &sample2 )*100. ); } } // Save Random Trees classifier to file if needed if( filename_to_save ) forest.save( filename_to_save ); cvReleaseMat( &sample_idx ); cvReleaseMat( &var_type ); cvReleaseMat( &data ); cvReleaseMat( &responses ); return 0; }
int main() { const int train_sample_count = 300; bool is_regression = false; const char* filename = "data/waveform.data"; int response_idx = 21; CvMLData data; CvTrainTestSplit spl( train_sample_count ); if(data.read_csv(filename) != 0) { printf("couldn't read %s\n", filename); exit(0); } data.set_response_idx(response_idx); data.change_var_type(response_idx, CV_VAR_CATEGORICAL); data.set_train_test_split( &spl ); const CvMat* values = data.get_values(); const CvMat* response = data.get_responses(); const CvMat* missing = data.get_missing(); const CvMat* var_types = data.get_var_types(); const CvMat* train_sidx = data.get_train_sample_idx(); const CvMat* var_idx = data.get_var_idx(); CvMat*response_map; CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL); int num_classes = response_map->cols; CvDTree dtree; printf("======DTREE=====\n"); CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0); dtree.train( &data, cvd_params); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() ); #if 0 /* boosted trees are only implemented for two classes */ printf("======BOOST=====\n"); CvBoost boost; boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 ); #endif printf("======RTREES=====\n"); CvRTrees rtrees; rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); CvERTrees ertrees; ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() ); printf("======GBTREES=====\n"); CvGBTrees gbtrees; CvGBTreesParams gbparams; gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression gbtrees.train( &data, gbparams); //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx); print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0); printf("======KNEAREST=====\n"); CvKNearest knearest; //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses, // const Mat& _sample_idx, bool _is_regression, // int _max_k, bool _update_base ) bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL; assert(is_classifier); int max_k = 10; knearest.train(values, response, train_sidx, is_regression, max_k, false); CvMat* new_response = cvCreateMat(response->rows, 1, values->type); //print_types(); //const CvMat* train_sidx = data.get_train_sample_idx(); knearest.find_nearest(values, max_k, new_response, 0, 0, 0); print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR), knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0); printf("======== RBF SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm1; CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm1.train(values, response, train_sidx, var_idx, params1); svm1.train_auto(values, response, var_idx, train_sidx, params1); svm_print_error(&svm1, values, response, response_idx, train_sidx); printf("======== Linear SVM =======\n"); CvMySVM svm2; CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm2.train(values, response, train_sidx, var_idx, params2); svm2.train_auto(values, response, var_idx, train_sidx, params2); svm_print_error(&svm2, values, response, response_idx, train_sidx); printf("======NEURONAL NETWORK=====\n"); int num_layers = 3; CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1)); cvmSetI(&layers, 0, 0, values->cols-1); cvmSetI(&layers, 0, 1, num_classes); cvmSetI(&layers, 0, 2, num_classes); CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0); CvANN_MLP_TrainParams ann_params; //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP; CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes); CvMat values2 = cvmat_remove_column(values, response_idx); ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000); //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000); ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx); #if 0 /* slow */ printf("======== Polygonal SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm3; CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY, /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm3.train(values, response, train_sidx, var_idx, params3); svm3.train_auto(values, response, var_idx, train_sidx, params3); svm_print_error(&svm3, values, response, response_idx, train_sidx); #endif return 0; }
int RandomTrees::train(const char* samples_filename, const char* model_filename, const double ratio, double &train_error, double &test_error) { CvMat* data = 0; CvMat* responses = 0; CvMat* var_type = 0; CvMat* sample_idx = 0; this->tree_parameters_.nactive_vars = (int)sqrt(this->number_of_features_); int ok = read_num_class_data( samples_filename, this->number_of_features_, &data, &responses ); int nsamples_all = 0, ntrain_samples = 0; int i = 0; double train_hr = 0, test_hr = 0; CvRTrees forest; CvMat* var_importance = 0; if( !ok ) { cout << "Could not read the sample in" << samples_filename << endl;; return -1; } cout << "The sample file " << samples_filename << " is loaded." << endl; nsamples_all = data->rows; ntrain_samples = (int)(nsamples_all * ratio); // create classifier by using <data> and <responses> cout << "Training the classifier ..." << endl; // 1. create type mask var_type = cvCreateMat( data->cols + 1, 1, CV_8U ); cvSet( var_type, cvScalarAll(CV_VAR_ORDERED) ); cvSetReal1D( var_type, data->cols, CV_VAR_CATEGORICAL ); // 2. create sample_idx sample_idx = cvCreateMat( 1, nsamples_all, CV_8UC1 ); { CvMat mat; cvGetCols( sample_idx, &mat, 0, ntrain_samples ); cvSet( &mat, cvRealScalar(1) ); cvGetCols( sample_idx, &mat, ntrain_samples, nsamples_all ); cvSetZero( &mat ); } // 3. train classifier forest.train( data, CV_ROW_SAMPLE, responses, 0, sample_idx, var_type, 0, this->tree_parameters_); cout << endl; // compute prediction error on train and test data for( i = 0; i < nsamples_all; i++ ) { double r; CvMat sample; cvGetRow( data, &sample, i ); r = forest.predict( &sample ); r = fabs((double)r - responses->data.fl[i]) <= FLT_EPSILON ? 1 : 0; if( i < ntrain_samples ) train_hr += r; else test_hr += r; } test_hr /= (double)(nsamples_all-ntrain_samples); train_hr /= (double)ntrain_samples; train_error = 1 - train_hr; test_error = 1 - test_hr; cout << "Recognition rate: train = " << train_hr*100 << ", test = " << test_hr*100 << endl; cout << "Number of trees: " << forest.get_tree_count() << endl; // Print variable importance var_importance = (CvMat*)forest.get_var_importance(); if( var_importance ) { double rt_imp_sum = cvSum( var_importance ).val[0]; printf("var#\timportance (in %%):\n"); for( i = 0; i < var_importance->cols; i++ ) printf( "%-2d\t%-4.1f\n", i,100.f*var_importance->data.fl[i]/rt_imp_sum); } // Save Random Trees classifier to file if needed if( model_filename ) forest.save( model_filename ); //cvReleaseMat( &var_importance ); //causes a segmentation fault cvReleaseMat( &sample_idx ); cvReleaseMat( &var_type ); cvReleaseMat( &data ); cvReleaseMat( &responses ); return 0; }
int main(int argc, char** argv) { // std::cout<<FLT_EPSILON<<std::endl; cv::Mat training_data, training_labels,testing_data, testing_labels; training_data = read_rgbd_data_cv(argv[1],NUMBER_OF_TRAINING_SAMPLES); training_labels = read_rgbd_data_cv(argv[2], NUMBER_OF_TRAINING_SAMPLES); testing_data = read_rgbd_data_cv(argv[3],NUMBER_OF_TESTING_SAMPLES); testing_labels = read_rgbd_data_cv(argv[4], NUMBER_OF_TESTING_SAMPLES); printf("dataset specs: %d samples with %d features\n", training_data.rows, training_data.cols); // define all the attributes as numerical // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) // that can be assigned on a per attribute basis cv::Mat var_type = cv::Mat(training_data.cols + 1, 1, CV_8U ); var_type.setTo(cv::Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical var_type.at<uchar>(training_data.cols, 0) = CV_VAR_CATEGORICAL; // the labels are categorical /********************************步骤1:定义初始化Random Trees的参数******************************/ float priors[] = {1,1,1,1,1}; // weights of each classification for classes CvRTParams params = CvRTParams(25, // max depth 50, // min sample count 0, // regression accuracy: N/A here false, // compute surrogate split, no missing data 15, // max number of categories (use sub-optimal algorithm for larger numbers) priors, // the array of priors false, // calculate variable importance 20, // number of variables randomly selected at node and used to find the best split(s). NUMBER_OF_TREES, // max number of trees in the forest 0.01f, // forrest accuracy CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria ); /****************************步骤2:训练 Random Decision Forest(RDF)分类器*********************/ // printf( "\nUsing training database: %s\n\n", argv[1]); CvRTrees* rtree = new CvRTrees; rtree->train(training_data, CV_ROW_SAMPLE, training_labels, cv::Mat(), cv::Mat(), var_type, cv::Mat(), params); // perform classifier testing and report results cv::Mat test_sample, train_sample; int correct_class = 0; int wrong_class = 0; int result; int label; int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0}; int false_negatives [NUMBER_OF_CLASSES] = {0,0,0,0,0}; CvDTreeNode* leaf_nodes [training_data.rows]; for (int tsample = 0; tsample < training_data.rows; tsample++) { train_sample = training_data.row(tsample); CvForestTree* tree = rtree->get_tree(1); CvDTreeNode* leaf_node = tree->predict(train_sample, cv::Mat()); leaf_nodes[tsample] = leaf_node; } // printf( "\nUsing testing database: %s\n\n", argv[2]); for (int tsample = 0; tsample < testing_data.rows; tsample++) { // extract a row from the testing matrix test_sample = testing_data.row(tsample); // train on the testing data: // test_sample = training_data.row(tsample); /********************************步骤3:预测*********************************************/ result = (int) rtree->predict(test_sample, cv::Mat()); label = (int) testing_labels.at<float>(tsample, 0); printf("Testing Sample %i -> class result (digit %d) - label (digit %d)\n", tsample, result, label); // get the leaf nodes of the first tree in the forest /*CvForestTree* tree = rtree->get_tree(0); std::list<const CvDTreeNode*> leaf_list; leaf_list = get_leaf_node( tree ); printf("Number of Leaf nodes: %ld\n", leaf_list.size());*/ // if the prediction and the (true) testing classification are the same // (N.B. openCV uses a floating point decision tree implementation!) if (fabs(result - label) >= FLT_EPSILON) { // if they differ more than floating point error => wrong class wrong_class++; false_positives[(int) result]++; false_negatives[(int) testing_labels.at<float>(tsample, 0)]++; } else { // otherwise correct correct_class++; } } printf( // "\nResults on the testing database: %s\n" "\tCorrect classification: %d (%g%%)\n" "\tWrong classifications: %d (%g%%)\n", // argv[2], correct_class, (double) correct_class*100/testing_data.rows, wrong_class, (double) wrong_class*100/testing_data.rows); for (int i = 0; i < NUMBER_OF_CLASSES; i++) { printf( "\tClass (digit %d) false postives %d (%g%%)\n\t false negatives %d (%g%%)\n", i, false_positives[i], (double) false_positives[i]*100/testing_data.rows, false_negatives[i], (double) false_negatives[i]*100/testing_data.rows); } // get all the leaf nodes in the forest for (int i = 0; i < NUMBER_OF_TREES; i ++) { CvForestTree* tree = rtree->get_tree(i); std::list<const CvDTreeNode*> leaf_list; leaf_list = get_leaf_node( tree ); } //get training_sample indices for leaf nodes std::list<leaf_samples> node_indices; for (int i = 0; i < training_data.rows; i++) { CvDTreeNode* leaf_node = leaf_nodes[i]; if (leaf_node != NULL) { leaf_samples leaf_sample; leaf_sample.leaf = leaf_node; leaf_sample.indices.push_front(i); printf("\nValue of leaf: %f\n", leaf_node->value); printf("Smaple indices for leaf:\n"); printf(" %d", i); for (int j=i+1; j < training_data.rows; j++) { if (leaf_node == leaf_nodes[j]) { leaf_sample.indices.push_front(j); printf(" %lu", j); leaf_nodes[j] = NULL; } } node_indices.push_front(leaf_sample); } } printf("\nSize of node_indices: %d\n", node_indices.size()); //get labels and features //get double pointers for features and labels const double* p = testing_data.ptr<double>(0); std::vector<double> vec(p, p + testing_data.cols); // all matrix memory free by destructors // all OK : main returns 0 // result = rtree->predict(testing_data.row(79), cv::Mat()); // float andi = result - testing_labels.at<float>(79, 0); // // std::cout<<training_labels.row(0).col(0)<<std::endl; // std::cout<<andi<<std::endl; return 0; }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { ASSERT_NUM_RHS_ARGS_GTE(2); ASSERT_NUM_LHS_ARGS_LT(3); const mxArray* dataMtx = prhs[0]; const mxArray* targetValueVec = prhs[1]; //see if we have been provided a struct containing options for the training. //if not, then use defaults provided by opencv CvRTParams* rtParams; if (nrhs > 2) { mexPrintf("Parsing struct argument for parameters\n"); rtParams = parse_struct_to_forest_config(prhs[2]); } else { mexPrintf("Using default parameters\n"); rtParams = parse_struct_to_forest_config(NULL); } mexPrintf("Parameters:\n"); print_forest_params(rtParams); unsigned int numSamples, numVariables; CvMat* dataCvMtx = matlab_matrix_to_opencv_matrix(dataMtx); numSamples = dataCvMtx->rows; numVariables = dataCvMtx->cols; mexPrintf("training data converted to opencv format. %d samples, each with %d variables\n", numSamples, numVariables); #ifdef PRINT_INPUTS print_opencv_matrix(dataCvMtx); #endif CvMat* targetCvMtx = matlab_array_to_opencv_array(targetValueVec); if (targetCvMtx->rows != numSamples) { MEX_ERR_PRINTF("training data had %d samples, labels contain %d values.", numSamples, targetCvMtx->rows); } mexPrintf("training labels converted to opencv format.\n"); #ifdef PRINT_INPUTS print_opencv_matrix(targetCvMtx); #endif //specify the type of our variables. In this case, all our variables are CvMat* var_type = cvCreateMat(dataCvMtx->cols + 1, 1, CV_8U); cvSet(var_type, cvScalarAll(CV_VAR_ORDERED)); //actually make the forest and do the training clock_t start_time, end_time; mexPrintf("training now..."); start_time = clock(); CvRTrees *forest = new CvRTrees; forest->train(dataCvMtx, CV_ROW_SAMPLE, targetCvMtx, NULL, NULL, var_type, NULL, *rtParams); end_time = clock(); clock_t diff_time = end_time - start_time; double seconds_passed = ((float)diff_time) / CLOCKS_PER_SEC; mexPrintf("training done in %fs\n", seconds_passed); //pack the pointer and return it to matlab plhs[0] = pack_pointer((void *)forest); // If the user supplied a second lhs argument, return them the time taken to train if (nlhs > 1) { plhs[1] = mxCreateDoubleScalar(seconds_passed); } cvReleaseMat(&var_type); cvReleaseMat(&dataCvMtx); cvReleaseMat(&targetCvMtx); }
void save(char* file) { forest.save(file); }
void evaluation(CvRTrees& forest, DataSet& data, Mat& sampleIdx, TrainResult& result) { int numTrainSamples = (cv::sum( sampleIdx ))[0]; // retrieve variable_importance result.var_importance = forest.get_var_importance(); // result.var_importance = forest.get_subtree_weights(); // cout << result.var_importance << endl; double min,max; Point minLoc,maxLoc; minMaxLoc(result.var_importance,&min,&max,&minLoc,&maxLoc); // printf("variable importance (max:%.2f%%):\n\n",max*100.f); // compute prediction error on train and test data result.train_hr = 0; result.test_hr = 0; result.fpRate = 0; result.fnRate = 0; Mat responses_new = Mat(data.numSamples,1,CV_32F,9.0); for(int i = 0; i < data.numSamples; i++ ) { double r; Mat sample = data.data.row(i); // do prediction with trained forest r = forest.predict(sample); responses_new.at<float>(i,0) = r; float respo = data.responses.at<float>(i,0); // prediction correct ? r = fabs(r - respo) <= FLT_EPSILON ? 1 : 0; if( sampleIdx.at<char>(0,i) ) result.train_hr += r; else result.test_hr += r; // false prediction, increase appropriate counter if(!r) { if(respo) result.fnRate += 1; else result.fpRate += 1; } } // cout << sampleIdx << endl; // cout << data.responses << endl; // cout << responses_new << endl; result.test_hr /= (double)(data.numSamples-numTrainSamples); result.train_hr /= (double)numTrainSamples; result.fpRate /= (double) data.numNeg; result.fnRate /= (double) data.numPos; }
/** @function main */ int main( int argc, char** argv ) { char selection; cout<<"Welcome to Plant Recognition System"<<endl; cout<<"Please select following in order to make an operation:"<<endl; cout<<"S for Segmentation and Feature Extraction of Normal Leaf Image"<<endl; cout<<"F for Feature Extraction of a Binary Image"<<endl; cout<<"C for Classification of Test Set with NN"<<endl; cout<<"T for Feature Extraction and Training of Train Set"<<endl; cout<<"Q for Extracted Features to CSV File"<<endl; cout<<"R for Classification with Random Forests"<<endl; cout<<"E for SIFT "<<endl; cout<<"Y for leaf detection test with SIFT+BoF+SVM"<<endl; cin>>selection; switch(selection) { case 'e': case 'E': { /* IplImage* input=cvLoadImage("C:/fb2.jpg", CV_LOAD_IMAGE_GRAYSCALE); vector<KeyPoint> keypoints; OutputArray descriptors; InputArray mask;*/ /* Mat input=imread("C:/fb2.jpg", CV_LOAD_IMAGE_GRAYSCALE); if( !input.data ) { cout<<"Error while loading data"<<endl; return -1; } int minHessian = 400; SurfFeatureDetector detector( minHessian ); std::vector<KeyPoint> keypoints_1; detector.detect( input, keypoints_1 ); Mat img_keypoints_1; drawKeypoints( input, keypoints_1, img_keypoints_1, Scalar::all(-1), DrawMatchesFlags::DEFAULT ); imshow("Keypoints 1", img_keypoints_1 );*/ /* const cv::Mat input = cv::imread("C:/MyPic.png", 0); //Load as grayscale cv::SiftFeatureDetector detector; std::vector<cv::KeyPoint> keypoints; detector.detect(input, keypoints); std::vector<cv::Point2f> points; std::vector<cv::KeyPoint>::iterator it; */ //cv::Mat pointMatrix(points); // Add results to image and save. /*cv::Mat output; cv::drawKeypoints(input, keypoints, output); cv::imwrite("C:/sift_result.jpg", output);*/ //waitKey(0); //return 0; } break; ////TRAIN VE TEST FEATURELARI CSV DOSYASI OLARAK TUTULDU (RANDOM FOREST ICIN) case 'q': case 'Q': { ConvertToCSV* myCsv=new ConvertToCSV(); myCsv->TrainFeaturesAsCSV(); myCsv->TestFeaturesAsCSV(); } break; ////SEGMENTATION WITH GRABCUT THEN FEATURE EXTRACTION case 'S': case 's': { Segment *mySegment=new Segment(); mySegment->makeSegmentation(); IplImage* segmented=cvCloneImage(mySegment->getSegmentedImage()); //cvShowImage("segmentedImage", segmented ); IplImage* converted=cvCreateImage( cvGetSize( segmented ), 8, 3 ); cvCvtColor(segmented, converted, CV_GRAY2RGB); cvShowImage("converted", converted ); ExtractDescriptorHelper* myExtract=new ExtractDescriptorHelper(); myExtract->ExtractDescriptors(converted); } break; ////FEATURE EXTRACTION OF SINGLE IMAGE case 'F': case 'f': { ImageReaderHelper* tmpReader=new ImageReaderHelper(); IplImage* src= tmpReader->readBinaryImage(); ExtractDescriptorHelper* tmpDescriptorFinder=new ExtractDescriptorHelper(); tmpDescriptorFinder->ExtractDescriptors(src); tmpDescriptorFinder->sortVector(); tmpDescriptorFinder->createFeatureVector(); } break; ////GEOMETRIC FEATURELARIN NORMALIZASYONU /* NormaliseGeoFeatures *normaliseTmp=new NormaliseGeoFeatures(); //normaliseTmp->produceFileNamesVect(); normaliseTmp->initializeMaxMin(); normaliseTmp->calcMaxMin(); normaliseTmp->normalizeGeoFeatures(); normaliseTmp->writeMaxMinToFile(); */ ////CLASSIFICATION WITH NEAREST NEIGHBOUR case 'C': case 'c': { int b; Classify *temp=new Classify(); //temp->getMinMaxFromFile(); b=temp->makeClassification(); //temp->sortDataVect(); //temp->printVector(); } break; /* int d; ClassifyIndividual *myTemp=new ClassifyIndividual(); myTemp->makeClassification(); myTemp->sortDataVect(); myTemp->printVector(); */ //FEATURE EXTRACTION AND WRITING IT TO TXT //DOSYADAN OKUMA VE FEATURELARI TXT DOSYASINA YAZMA YAPILDI, ÇALIŞIYOR. //C:/Deneme/ DİZİNİNDEKİ DOSYALAR İÇİN GERÇEKLENDİ, FEATURELAR YAZILIYOR.. case 'T': case 't': { int sayac=0; DIR *dir; struct dirent *ent; char folder[100]; char writeFile[100]; string write; string str1="C:/TrainSet/."; string str2="C:/TrainSet/.."; string str3="C:/TrainSet/Thumbs.db"; if ((dir = opendir ("C:/TrainSet/"))) { while ((ent = readdir (dir)) != NULL) { sprintf (folder, "C:/TrainSet/%s", ent->d_name); sprintf (writeFile, "C:/Features/%s.txt", ent->d_name); cout<<"File name:"<<folder<<endl; if(str1.compare(folder)==0 || str2.compare(folder)==0 || str3.compare(folder)==0) continue; IplImage* src; src=cvLoadImage(folder); ExtractDescriptorHelper* tmpDescriptorFinder=new ExtractDescriptorHelper(); tmpDescriptorFinder->ExtractDescriptors(src); tmpDescriptorFinder->sortVector(); tmpDescriptorFinder->createFeatureVector(); vector<double> writeVector=tmpDescriptorFinder->getMyFeatureVector(); double featureArray[featureVectSize]; free(tmpDescriptorFinder); sayac++; for(int p=0; p<featureVectSize; p++) featureArray[p]=writeVector.at(p); ofstream myfile; myfile.open (writeFile); for(int k=0; k<featureVectSize; k++) myfile << featureArray[k]<<"\n"; myfile.close(); } closedir (dir); } else { cout<<"Error exists"<<endl; perror (""); return EXIT_FAILURE; } cout<<"Sayac: "<<sayac<<endl; } break; case 'R': case 'r': { // lets just check the version first printf ("OpenCV version %s (%d.%d.%d)\n", CV_VERSION, CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); // define training data storage matrices (one for attribute examples, one // for classifications) //CV_8UC(15) , CV_32FC1 Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); //define testing data storage matrices Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); // define all the attributes as numerical // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) // that can be assigned on a per attribute basis Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U ); var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical // this is a classification problem (i.e. predict a discrete number of class // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; double result; // value returned from a prediction // load training and testing data sets if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) { // define the parameters for training the random forest (trees) float priors[] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; // weights of each classification for classes // (all equal as equal samples of each digit) CvRTParams params = CvRTParams(20, // max depth 5, // min sample count 0, // regression accuracy: N/A here false, // compute surrogate split, no missing data 15, // max number of categories (use sub-optimal algorithm for larger numbers) priors, // the array of priors false, // calculate variable importance 40, // number of variables randomly selected at node and used to find the best split(s). 100, // max number of trees in the forest 0.01f, // forrest accuracy CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria ); // train random forest classifier (using training data) printf( "\nUsing training database: %s\n\n", argv[1]); CvRTrees* rtree = new CvRTrees; rtree->train(training_data, CV_ROW_SAMPLE, training_classifications, Mat(), Mat(), var_type, Mat(), params); // perform classifier testing and report results Mat test_sample; int correct_class = 0; int wrong_class = 0; int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; printf( "\nUsing testing database: %s\n\n", argv[2]); for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) { // extract a row from the testing matrix test_sample = testing_data.row(tsample); // run random forest prediction result = rtree->predict(test_sample, Mat()); printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result); // if the prediction and the (true) testing classification are the same // (N.B. openCV uses a floating point decision tree implementation!) if (fabs(result - testing_classifications.at<float>(tsample, 0)) >= FLT_EPSILON) { // if they differ more than floating point error => wrong class wrong_class++; false_positives[(int) result]++; } else { // otherwise correct correct_class++; } } printf( "\nResults on the testing database: %s\n" "\tCorrect classification: %d (%g%%)\n" "\tWrong classifications: %d (%g%%)\n", argv[2], correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); for (int i = 0; i < NUMBER_OF_CLASSES; i++) { printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, false_positives[i], (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); } // all matrix memory free by destructors // all OK : main returns 0 return 0; } // not OK : main returns -1 return -1; } break; case 'Y': case 'y': { isLeaf *myLeafTmp=new isLeaf(); //myLeafTmp->produceDictionary(); *** herhangi 2sini veya 3unu aynı anda çalıştırma. myLeafTmp->produceTrainData(); // everything here. //myLeafTmp->produceTestData(); //myLeafTmp->isLeafOrNot(); crashes } break; } waitKey(0); return 0; };
int main() { //-----------------------------------读图片------------------------ IplImage*** imgs = new IplImage**[END - START + 1]; IplImage*** tex_imgs = new IplImage**[END - START + 1]; for (int i = 0; i < END - START + 1; i++) { imgs[i] = new IplImage*[5]; tex_imgs[i] = new IplImage*[4]; } for (int i = 0; i < END - START + 1; i++) { for (int j = 0; j < 4; j++) { imgs[i][j] = NULL; tex_imgs[i][j] = NULL; } imgs[i][4] = NULL; } //---------------------------------------- cout << "read image..........." << endl; for (int i = START; i <= END; i++) { char flairname[100], t1name[100], t1cname[100], t2name[100], truthname[100]; memset(flairname, 0, 100); memset(t1name, 0, 100); memset(t1cname, 0, 100); memset(t2name, 0, 100); memset(truthname, 0, 100); sprintf(flairname, "BRATS_HG0005_FLAIR/BRATS_HG0005_FLAIR_%d.png", i); sprintf(t1name, "BRATS_HG0005_T1/BRATS_HG0005_T1_%d.png", i); sprintf(t1cname, "BRATS_HG0005_T1C/BRATS_HG0005_T1C_%d.png", i); sprintf(t2name, "BRATS_HG0005_T2/BRATS_HG0005_T2_%d.png", i); sprintf(truthname, "BRATS_HG0005_truth/BRATS_HG0005_truth_%d.png", i); IplImage* flair_img = RGB2GRAY(cvLoadImage(flairname)); IplImage* t1_img = RGB2GRAY(cvLoadImage(t1name)); IplImage* t1c_img = RGB2GRAY(cvLoadImage(t1cname)); IplImage* t2_img = RGB2GRAY(cvLoadImage(t2name)); IplImage* truth_img = RGB2GRAY(cvLoadImage(truthname)); imgs[i - START][0] = flair_img; imgs[i - START][1] = t1_img; imgs[i - START][2] = t1c_img; imgs[i - START][3] = t2_img; imgs[i - START][4] = truth_img; //获取纹理图 IplImage* flair_tex = cvCreateImage(cvGetSize(flair_img), IPL_DEPTH_8U, 1); IplImage* t1_tex = cvCreateImage(cvGetSize(t1_img), IPL_DEPTH_8U, 1); IplImage* t1c_tex = cvCreateImage(cvGetSize(t1c_img), IPL_DEPTH_8U, 1); IplImage* t2_tex = cvCreateImage(cvGetSize(t2_img), IPL_DEPTH_8U, 1); LBP(flair_img, flair_tex); LBP(t1_img, t1_tex); LBP(t1c_img, t1c_tex); LBP(t2_img, t2_tex); tex_imgs[i - START][0] = flair_tex; tex_imgs[i - START][1] =t1_tex; tex_imgs[i - START][2] = t1c_tex; tex_imgs[i - START][3] = t2_tex; } //---------------------------------------------------------- cout << "read training data............" << endl; Mat train_datas(HEIGHT*WIDTH*(END - START + 1), ATTRIBUTES_PER_SAMPLE, CV_32FC1); Mat responses(HEIGHT*WIDTH*(END - START + 1), 1, CV_32SC1); //---读取训练数据---- int dataline=read_training_data(imgs,tex_imgs, train_datas, responses); Mat _train_datas(dataline, ATTRIBUTES_PER_SAMPLE, CV_32FC1); Mat _responses(dataline, 1, CV_32SC1); //减少训练数据为dataline个 for (int i = 0; i < dataline; i++) { float* float_data = train_datas.ptr<float>(i); int* int_data = responses.ptr<int>(i); _train_datas.at<float>(i, 0) = float_data[0]; _train_datas.at<float>(i, 1) = float_data[1]; _train_datas.at<float>(i, 2) = float_data[2]; _train_datas.at<float>(i, 3) = float_data[3]; _train_datas.at<float>(i, 4) = float_data[4]; _train_datas.at<float>(i, 5) = float_data[5]; _train_datas.at<float>(i, 6) = float_data[6]; _train_datas.at<float>(i, 7) = float_data[7]; _train_datas.at<float>(i, 8) = float_data[8]; _responses.at<int>(i, 0) = int_data[0]; } //----设置输入类型--- Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE+1, 1, CV_8U); var_type.setTo(Scalar(CV_VAR_NUMERICAL)); // all inputs are numerical var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; //---训练数据--- cout << "training......." << endl; float priors[NUMBER_OF_CLASSES] = { 1, 1 }; CvRTParams params = CvRTParams(25, // max depth 4, // min sample count 0, // regression accuracy: N/A here false, // compute surrogate split, no missing data 5, // max number of categories (use sub-optimal algorithm for larger numbers) priors, // the array of priors false, // calculate variable importance 3, // number of variables randomly selected at node and used to find the best split(s). 3, // max number of trees in the forest 0.01f, // forrest accuracy CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria ); CvRTrees* rtree = new CvRTrees; bool train_result = rtree->train(_train_datas, CV_ROW_SAMPLE, _responses, Mat(), Mat(), var_type, Mat(), params); if (train_result == false) cout << "random trees train failed!" << endl; cout << "predicting.........." << endl; //-------预测数据生成图片并存储--------- for (int k = 0; k < END - START + 1; k++) { IplImage* img_dst = cvCreateImage(cvGetSize(imgs[k][0]), IPL_DEPTH_8U, 1); uchar* ptr; for (int i = 0; i <HEIGHT ; i++) { ptr = (uchar*)img_dst->imageData + i*img_dst->widthStep; for (int j = 0; j < WIDTH; j++) {//读一行数据 Mat test_data(1, ATTRIBUTES_PER_SAMPLE, CV_32FC1); test_data.at<float>(0, 0) = cvGet2D(imgs[k][0], i, j).val[0]; test_data.at<float>(0, 1) = cvGet2D(imgs[k][1], i, j).val[0]; test_data.at<float>(0, 2) = cvGet2D(imgs[k][2], i, j).val[0]; test_data.at<float>(0, 3) = cvGet2D(imgs[k][3], i, j).val[0]; test_data.at<float>(0, 4) = cvGet2D(tex_imgs[k][0], i, j).val[0]; test_data.at<float>(0, 5) = cvGet2D(tex_imgs[k][1], i, j).val[0]; test_data.at<float>(0, 6) = cvGet2D(tex_imgs[k][2], i, j).val[0]; test_data.at<float>(0, 7) = cvGet2D(tex_imgs[k][3], i, j).val[0]; test_data.at<float>(0, 8) = k; //产生结果 int result = rtree->predict(test_data, Mat()); *(ptr + j) = result * 255; } } IplConvKernel* strel = cvCreateStructuringElementEx(5, 5, 2, 2, CV_SHAPE_ELLIPSE); cvErode(img_dst, img_dst, strel, 1); //cvDilate(img_dst, img_dst, strel, 1); cout << "save image " << k + START << endl; char result_name[100]; memset(result_name, 0, 100); sprintf(result_name, "BRATS_HG0005_RESULT/BRATS_HG0005_RESULT_%d.png", k+START); cvSaveImage(result_name, img_dst); } cout << "complete!!!" << endl; cvWaitKey(0); }
void doCrossValidation( DataSet& data, TrainResult& result) { //these vars not needed - use empty Mat Mat varIdx, missingDataMask; // BoostParams forestParams = cv::BoostParams(cv::Boost::DEFAULT, 100, 0.95, 5, false, 0 ); Mat sampleIdx; int nFold = 5; result.train_hr = 0; result.test_hr = 0; result.fpRate = 0; result.fnRate = 0; // printf( "numSamples %d", data.numSamples); // define training/test-sets within trainData for(int round = 0; round < nFold; round++) { //define test and trainingsset float partTrain = 1.0/nFold; sampleIdx = Mat(1,data.numSamples,CV_8U,1.0); int negIdx = (int)floor(partTrain*data.numNeg); sampleIdx.colRange(negIdx*round, negIdx*(round+1)) = 0.0; int posIdx = (int)floor( partTrain*data.numPos ); sampleIdx.colRange( data.numNeg+posIdx*round, data.numNeg + posIdx*(round+1)) = 0.0; //int numT = (cv::sum( sampleIdx ))[0]; //printf("sample Idx sum (trainsamples): %d\n",numT); int numTestSamples = negIdx + posIdx; printf("numSamples: %d -- numTrainSamples: %d -- numTestSamples: %d\n",data.numSamples, data.numSamples-numTestSamples, numTestSamples ); //training forest.train(data.data, CV_ROW_SAMPLE, data.responses, varIdx, sampleIdx, data.varType, missingDataMask, forestParams); //evaluation TrainResult roundResult; evaluation(forest, data, sampleIdx, roundResult); result.fnRate += roundResult.fnRate; result.fpRate += roundResult.fpRate; result.test_hr += roundResult.test_hr; result.train_hr += roundResult.train_hr; if( round == 0 ) result.var_importance = roundResult.var_importance.clone(); else result.var_importance += roundResult.var_importance; printf( "Round %d.Recognition rate: train = %.2f%%, test = %.2f%% -- overall FN = %.2f%%, FP = %.2f%%\n", round, roundResult.train_hr*100., roundResult.test_hr*100. ,roundResult.fnRate*100. ,roundResult.fpRate*100.); } result.fnRate /= nFold; result.fpRate /= nFold; result.test_hr /= nFold; result.train_hr /= nFold; result.var_importance /= nFold; double sum = (cv::sum(result.var_importance))[0]; result.var_importance /= sum; printf( "____\nRecognition rate: train = %.2f%%, test = %.2f%% -- overall FN = %.2f%%, FP = %.2f%%\n", result.train_hr*100., result.test_hr*100. ,result.fnRate*100. ,result.fpRate*100.); }
Int_t main() { // Access ntuples TFile* file[2]; file[0] = new TFile("~/SingleMuon_pT_501_500.root"); file[1] = new TFile("~/SingleMuon_pT_200_150.root"); TTree* tree[2]; tree[0] = (TTree*)file[0]->Get("trees"); tree[1] = (TTree*)file[1]->Get("trees"); // Declare variables and set branch addresses Double_t ptR[2] = {0, 0}, ptER[2] = {0, 0}, chi2R[2] = {0, 0}, d0R[2] = {0, 0}, dXYR[2] = {0, 0}, dZR[2] = {0, 0}, d0ER[2] = {0, 0}, etaR[2] = {0, 0}, etaER[2] = {0, 0}, phiR[2] = {0, 0}, resXR[2] = {0, 0}, resYR[2] = {0, 0}; Double_t ptG[2] = {0, 0}, etaG[2] = {0, 0}, phiG[2] = {0, 0}; Double_t globalTrkX[2] = {0, 0}, globalTrkY[2] = {0, 0}, globalTrkZ[2] = {0, 0}, hitPosX[2] = {0, 0}, hitPosY[2] = {0, 0}, hitPosZ[2] = {0, 0}, transImpPar4RecHits[2] = {0, 0}; Int_t foundR[2] = {0, 0}, lostR[2] = {0, 0}, ndofR[2] = {0, 0}, idG[2] = {0, 0}, eventN[2] = {0, 0}, nRepeats[2] = {0, 0}, nMuons[2] = {0, 0}; Int_t muonHits[2] = {0, 0}, dtHits[2] = {0, 0}, cscHits[2] = {0, 0}, rpcHits[2] = {0, 0}; // Muon hits Int_t pixelHits[2] = {0, 0}, barrelHits[2] = {0, 0}, endcapHits[2] = {0, 0}; // Pixel hits Int_t stripHits[2] = {0, 0}, tibHits[2] = {0, 0}, tidHits[2] = {0, 0}, tobHits[2] = {0, 0}, tecHits[2] = {0, 0}; // Strip hits Bool_t hQualR[2] = {0, 0}, repeatFlag[2] = {0, 0}; // Missing folder items in ntuple?? Long64_t event[2]; for (Int_t t = 0; t < 2; t++) { event[t] = tree[t]->GetEntries(); tree[t]->SetBranchAddress("ptR", &ptR[t]); tree[t]->SetBranchAddress("ptER", &ptER[t]); tree[t]->SetBranchAddress("chi2R", &chi2R[t]); tree[t]->SetBranchAddress("d0R", &d0R[t]); tree[t]->SetBranchAddress("dXYR", &dXYR[t]); tree[t]->SetBranchAddress("dZR", &dZR[t]); tree[t]->SetBranchAddress("d0ER", &d0ER[t]); tree[t]->SetBranchAddress("foundR", &foundR[t]); tree[t]->SetBranchAddress("lostR", &lostR[t]); tree[t]->SetBranchAddress("etaR", &etaR[t]); tree[t]->SetBranchAddress("etaER", &etaER[t]); tree[t]->SetBranchAddress("phiR", &phiR[t]); tree[t]->SetBranchAddress("hQualR", &hQualR[t]); tree[t]->SetBranchAddress("ndofR", &ndofR[t]); tree[t]->SetBranchAddress("ptG", &ptG[t]); tree[t]->SetBranchAddress("etaG", &etaG[t]); tree[t]->SetBranchAddress("phiG", &phiG[t]); tree[t]->SetBranchAddress("idG", &idG[t]); tree[t]->SetBranchAddress("residualXR", &resXR[t]); tree[t]->SetBranchAddress("residualYR", &resYR[t]); tree[t]->SetBranchAddress("globalTrkX", &globalTrkX[t]); tree[t]->SetBranchAddress("globalTrkY", &globalTrkY[t]); tree[t]->SetBranchAddress("globalTrkZ", &globalTrkZ[t]); tree[t]->SetBranchAddress("numberOfValidMuonHits", &muonHits[t]); tree[t]->SetBranchAddress("numberOfValidPixelHits", &pixelHits[t]); tree[t]->SetBranchAddress("numberOfValidPixelBarrelHits", &barrelHits[t]); tree[t]->SetBranchAddress("numberOfValidPixelEndcapHits", &endcapHits[t]); tree[t]->SetBranchAddress("numberOfValidStripHits", &stripHits[t]); tree[t]->SetBranchAddress("numberOfValidStripTIBHits", &tibHits[t]); tree[t]->SetBranchAddress("numberOfValidStripTIDHits", &tidHits[t]); tree[t]->SetBranchAddress("numberOfValidStripTOBHits", &tobHits[t]); tree[t]->SetBranchAddress("numberOfValidStripTECHits", &tecHits[t]); tree[t]->SetBranchAddress("numberOfValidMuonDTHits", &dtHits[t]); tree[t]->SetBranchAddress("numberOfValidMuonCSCHits", &cscHits[t]); tree[t]->SetBranchAddress("numberOfValidMuonRPCHits", &rpcHits[t]); tree[t]->SetBranchAddress("eventN", &eventN[t]); tree[t]->SetBranchAddress("repeatFlag", &repeatFlag[t]); tree[t]->SetBranchAddress("numbRepeats", &nRepeats[t]); tree[t]->SetBranchAddress("numbMuons", &nMuons[t]); tree[t]->SetBranchAddress("hitPosX", &hitPosX[t]); tree[t]->SetBranchAddress("hitPosY", &hitPosY[t]); tree[t]->SetBranchAddress("hitPosZ", &hitPosZ[t]); tree[t]->SetBranchAddress("transImpPar4RecHits", &transImpPar4RecHits[t]); } // Forest parameters const Int_t VARS = 4; Int_t train_good = 200; // out of 791 Int_t train_bad = 200; // out of 678 Int_t max_trees = 50; Int_t max_depth = 15; Int_t nactive_vars = 0; // 0 for sqrt(VARS) Int_t min_sample_count = 10; Float_t regression_accuracy = 0; Bool_t use_surrogates = false; Int_t max_categories = 2; Float_t priors[] = {1., 1.}; Bool_t calc_var_importance = false; Float_t forest_accuracy = 0.01; Int_t termcrit_type = CV_TERMCRIT_ITER; // CV_TERMCRIT_EPS or ITER // Create canvases TCanvas* c1 = new TCanvas("c1", "Histogram and ROC curve", 1280, 480); c1->Divide(2, 1); c1->SetGrid(); c1->SetLogx(); c1->SetLogy(); TCanvas* c2 = new TCanvas("c2", "Feature histograms", 1280, 720); c2->Divide(2, 2); TFile* canvas = new TFile("canvas.root", "RECREATE"); TLegend* legend = new TLegend(0.11, 0.7, 0.4, 0.89); // Create histogram and graph arrays const Int_t BINS = 101; Double_t xmin = -0.01; Double_t xmax = 1.01; TH1D* hist[2]; TGraph* graph; hist[0] = new TH1D("h0", "Good tracks", BINS, xmin, xmax); hist[1] = new TH1D("h1", "Bad tracks", BINS, xmin, xmax); TH1D* featH[2][4]; // Blank placeholder histogram for graph axes TH2D* blank = new TH2D("g1", "ROC curve", 2, 0.3, 1.1, 2, 0.001, 1.1); blank->GetXaxis()->SetTitle("good efficiency"); blank->GetYaxis()->SetTitle("bad efficiency"); blank->SetStats(0); // theRNG().state = getTickCount(); // Create training array const Int_t TRAIN = train_good + train_bad; Int_t resp[TRAIN]; Float_t train[TRAIN][VARS]; for (Int_t i = 0; i < train_good; i++) { tree[0]->GetEvent(i); resp[i] = 0; // train[i][0] = tibHits[0]; // train[i][1] = tidHits[0]; // train[i][2] = tobHits[0]; // train[i][3] = tecHits[0]; train[i][0] = ptR[0]; train[i][1] = etaR[0]; train[i][2] = phiR[0]; train[i][3] = foundR[0]; } for (Int_t i = 0; i < train_bad; i++) { tree[1]->GetEvent(i); resp[i + train_good] = 1; // train[i + train_good][0] = tibHits[1]; // train[i + train_good][1] = tidHits[1]; // train[i + train_good][2] = tobHits[1]; // train[i + train_good][3] = tecHits[1]; train[i + train_good][0] = ptR[1]; train[i + train_good][1] = etaR[1]; train[i + train_good][2] = phiR[1]; train[i + train_good][3] = foundR[1]; } Int_t tflag = CV_ROW_SAMPLE; Mat responses(TRAIN, 1, CV_32SC1, resp); Mat train_data(TRAIN, VARS, CV_32FC1, train); /* for (Int_t i = 0; i < TRAIN; i++) { for (Int_t j = 0; j < VARS; j++) cout << train[i][j] << "\t"; cout << endl; } */ // Create type mask Int_t var[VARS + 1]; for (Int_t i = 0; i < VARS; i++) var[i] = CV_VAR_ORDERED; var[VARS] = CV_VAR_CATEGORICAL; Mat var_type(VARS + 1, 1, CV_32SC1, var); var_type.convertTo(var_type, CV_8SC1); // Convert to 8-bit ints // Create missing data mask Int_t miss_t[TRAIN][VARS]; for (Int_t i = 0; i < TRAIN; i++) { for (Int_t j = 0; j < VARS; j++) miss_t[i][j] = 0; } Mat missing_data_mask(TRAIN, VARS, CV_32SC1, miss_t); missing_data_mask.convertTo(missing_data_mask, CV_8UC1); // Create indices Mat var_idx = Mat::ones(VARS, 1, CV_8UC1); Mat sample_idx = Mat::ones(TRAIN, 1, CV_8UC1); // Train forest, print variable importance (if used) cout << "Trees: " << max_trees << endl; cout << "Depth: " << max_depth << endl; cout << "m: " << nactive_vars << endl; CvRTrees forest; forest.train(train_data, tflag, responses, var_idx, sample_idx, var_type, missing_data_mask, CvRTParams(max_depth, min_sample_count, regression_accuracy, use_surrogates, max_categories, priors, calc_var_importance, nactive_vars, max_trees, forest_accuracy, termcrit_type)); if (calc_var_importance) { Mat imp = forest.getVarImportance(); cout << endl << imp << endl << endl; } // Create solving array and data mask Int_t solve_good = event[0] - train_good; Int_t solve_bad = event[1] - train_bad; const Int_t SOLVE = solve_good + solve_bad; Int_t flag[SOLVE]; Float_t solve[SOLVE][VARS]; for (Int_t i = 0; i < solve_good; i++) { tree[0]->GetEvent(i + train_good); flag[i] = 0; // solve[i][0] = tibHits[0]; // solve[i][1] = tidHits[1]; // solve[i][2] = tobHits[1]; // solve[i][3] = tecHits[0]; solve[i][0] = ptR[0]; solve[i][1] = etaR[0]; solve[i][2] = phiR[0]; solve[i][3] = foundR[0]; } for (Int_t i = 0; i < solve_bad; i++) { tree[1]->GetEvent(i + train_bad); flag[i + solve_good] = 1; // solve[i + solve_good][0] = tibHits[1]; // solve[i + solve_good][1] = tidHits[1]; // solve[i + solve_good][2] = tobHits[1]; // solve[i + solve_good][3] = tecHits[1]; solve[i + solve_good][0] = ptR[1]; solve[i + solve_good][1] = etaR[1]; solve[i + solve_good][2] = phiR[1]; solve[i + solve_good][3] = foundR[1]; } Int_t miss_s[SOLVE][VARS]; for (Int_t i = 0; i < SOLVE; i++) { for (Int_t j = 0; j < VARS; j++) miss_s[i][j] = 0; } /* for (Int_t i = 0; i < SOLVE; i++) { for (Int_t j = 0; j < VARS; j++) cout << solve[i][j] << "\t"; cout << endl; } */ // Split solving data into 1d matrices and process each Float_t prediction[SOLVE]; for (Int_t i = 0; i < SOLVE; i++) { Mat sample(VARS, 1, CV_32FC1, solve[i]); Mat missing(VARS, 1, CV_32SC1, miss_s[i]); missing.convertTo(missing, CV_8UC1); prediction[i] = forest.predict_prob(sample, missing); } // Create and fill histogram for (Int_t i = 0; i < SOLVE; i++) { if (flag[i]) hist[0]->Fill(prediction[i]); else hist[1]->Fill(prediction[i]); } // Calculate errors at specificity 0.5 Double_t errors[4] = {0, 0, 0, 0}; // True pos, false pos, true neg, false neg for (Int_t i = 0; i < SOLVE; i++) { if (prediction[i] > 0.5) { if (flag[i]) errors[0]++; else errors[1]++; } else { if (!flag[i]) errors[2]++; else errors[3]++; } } Double_t perc_errors[] = {0, 0, 0, 0, 0}; perc_errors[0] = errors[0] / (errors[0] + errors[1]) * 100; // True pos perc_errors[1] = errors[1] / (errors[0] + errors[1]) * 100; // False pos perc_errors[2] = errors[2] / (errors[2] + errors[3]) * 100; // True neg perc_errors[3] = errors[3] / (errors[2] + errors[3]) * 100; // False neg perc_errors[4] = (errors[1] + errors[3]) / (errors[0] + errors[1] + errors[2] + errors[3]) * 100; cout << "For specificity 0.5:" << endl; cout << "True bad tracks: " << perc_errors[0] << "%" << endl; cout << "False bad tracks: " << perc_errors[1] << "%" << endl; cout << "True good tracks: " << perc_errors[2] << "%" << endl; cout << "False good tracks: " << perc_errors[3] << "%" << endl; cout << "Combined errors: " << perc_errors[4] << "%" << endl << endl; // Draw histograms c1->cd(1); // hist[0]->SetLineColor(color[a]); hist[0]->SetStats(0); hist[0]->Draw(); hist[1]->SetLineColor(2); hist[1]->SetStats(0); hist[1]->Draw("SAME"); // Make ROC curve Double_t eff[2][BINS]; for (Int_t b = 0; b < BINS; b++) { eff[0][b] = hist[0]->Integral(1, b + 1) / hist[0]->Integral(); eff[1][b] = hist[1]->Integral(1, b + 1) / hist[1]->Integral(); } c1->cd(2); blank->Draw(); graph = new TGraph(BINS, eff[1], eff[0]); // graph->SetLineColor(color[a]); graph->Draw("L"); // legend->AddEntry(graph, title[a].c_str(), "LPF"); // legend->Draw(); // Create feature histograms featH[0][0] = new TH1D("feat 0 0", "ptR", 50, 0, 650); featH[1][0] = new TH1D("feat 1 0", "ptR", 50, 0, 650); featH[0][1] = new TH1D("feat 0 1", "etaR", 25, -0.1, 0.1); featH[1][1] = new TH1D("feat 1 1", "etaR", 25, -0.1, 0.1); featH[0][2] = new TH1D("feat 0 2", "phiR", 25, -0.1, 0.1); featH[1][2] = new TH1D("feat 1 2", "phiR", 25, -0.1, 0.1); featH[0][3] = new TH1D("feat 0 3", "foundR", 25, 0, 25); featH[1][3] = new TH1D("feat 1 3", "foundR", 25, 0, 25); for (Int_t h = 0; h < 2; h++) { for (Int_t i = 0; i < event[h]; i++) { tree[h]->GetEvent(i); featH[h][0]->Fill(ptR[h]); featH[h][1]->Fill(etaR[h]); featH[h][2]->Fill(phiR[h]); featH[h][3]->Fill(foundR[h]); } } for (Int_t f = 0; f < 4; f++) { c2->cd(f + 1); featH[0][f]->Draw(); featH[1][f]->SetLineColor(kRed); featH[1][f]->Draw("SAME"); } c1->Write(); c2->Write(); canvas->Close(); return 0; }
int main( int argc, char** argv ) { // lets just check the version first printf ("OpenCV version %s (%d.%d.%d)\n", CV_VERSION, CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); if(argc != 4) { printf("Usage: %s file_training file_testing number_of_classes", argv[0]); exit(0); } //define number of training and testing samples and number of attributes int* results = find_parameters_from_csv(argv[1], argv[2]); int NUMBER_OF_TRAINING_SAMPLES = results[0] - 1; int NUMBER_OF_TESTING_SAMPLES = results[1] -1 ; int ATTRIBUTES_PER_SAMPLE = results[2]; int NUMBER_OF_CLASSES = atoi(argv[3]); printf("N° of training samples: %d \nN° testing of samples: %d \nN° of attributes: %d \nN° of classes: %d \n", NUMBER_OF_TRAINING_SAMPLES,NUMBER_OF_TESTING_SAMPLES,ATTRIBUTES_PER_SAMPLE,NUMBER_OF_CLASSES ); // define training data storage matrices (one for attribute examples, one // for classifications) Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); //define testing data storage matrices Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); // define all the attributes as numerical // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) // that can be assigned on a per attribute basis Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U ); var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical // this is a classification problem (i.e. predict a discrete number of class // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; double result; // value returned from a prediction // load training and testing data sets if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE) && read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE)) { // define the parameters for training the random forest (trees) // weights of each classification for classes // (all equal as equal samples of each digit) float priors[NUMBER_OF_CLASSES]; for (int z = 0; z < NUMBER_OF_CLASSES; z++) { priors[z] = 1; } //dà peso 1 a ciascuna classe all'inizio CvRTParams params = CvRTParams(25, // max depth 2, // min sample count 0, // regression accuracy: N/A here false, // compute surrogate split, no missing data 15, // max number of categories (use sub-optimal algorithm for larger numbers) priors, // the array of priors false, // calculate variable importance 4, // number of variables randomly selected at node and used to find the best split(s). 100, // max number of trees in the forest 0.01f, // forrest accuracy CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria ); // train random forest classifier (using training data) printf( "\nUsing training database: %s\n\n", argv[1]); CvRTrees* rtree = new CvRTrees; rtree->train(training_data, CV_ROW_SAMPLE, training_classifications, Mat(), Mat(), var_type, Mat(), params); // perform classifier testing and report results Mat test_sample; int correct_class = 0; int wrong_class = 0; int false_positives [NUMBER_OF_CLASSES]; //initialize every element in false_positives to 0 for (int z = 0; z < NUMBER_OF_CLASSES; z++) { false_positives[z] = 0; } printf( "\nUsing testing database: %s\n\n", argv[2]); for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) { // extract a row from the testing matrix test_sample = testing_data.row(tsample); // run random forest prediction result = rtree->predict(test_sample, Mat()); printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result); // if the prediction and the (true) testing classification are the same // (N.B. openCV uses a floating point decision tree implementation!) if (fabs(result - testing_classifications.at<float>(tsample, 0)) >= FLT_EPSILON) { // if they differ more than floating point error => wrong class wrong_class++; false_positives[(int) result]++; } else { // otherwise correct correct_class++; } } printf( "\nResults on the testing database: %s\n" "\tCorrect classification: %d (%g%%)\n" "\tWrong classifications: %d (%g%%)\n", argv[2], correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); for (int i = 0; i < NUMBER_OF_CLASSES; i++) { printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, false_positives[i], (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); } // all matrix memory free by destructors // all OK : main returns 0 return 0; } // not OK : main returns -1 return -1; }