예제 #1
0
void Model::Train_rtrees( const SampleSet& samples )
{
	CvRTrees* model = (CvRTrees*)m_pModel;
	CvRTParams* para = (CvRTParams*)m_trainPara;
	model->train(samples.Samples(), CV_ROW_SAMPLE, samples.Labels(), 
		cv::Mat(), cv::Mat(), cv::Mat(), cv::Mat(), *para);
}
예제 #2
0
파일: tree_engine.cpp 프로젝트: glo/ee384b
int main()
{
    const int train_sample_count = 300;

//#define LEPIOTA
#ifdef LEPIOTA
    const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data";
#else
    const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data";
#endif

    CvDTree dtree;
    CvBoost boost;
    CvRTrees rtrees;
    CvERTrees ertrees;

    CvMLData data;

    CvTrainTestSplit spl( train_sample_count );
    
    data.read_csv( filename );

#ifdef LEPIOTA
    data.set_response_idx( 0 );     
#else
    data.set_response_idx( 21 );     
    data.change_var_type( 21, CV_VAR_CATEGORICAL );
#endif

    data.set_train_test_split( &spl );
    
    printf("======DTREE=====\n");
    dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 ));
    print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() );

#ifdef LEPIOTA
    printf("======BOOST=====\n");
    boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
    print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 );
#endif

    printf("======RTREES=====\n");
    rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() );

    printf("======ERTREES=====\n");
    ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() );

    return 0;
}
  void test()
  {
    CvMat* tmp_mat = cvCreateMat(1, feat_count_, CV_32FC1);

    int pos_right = 0;
    int pos_total = 0;
    for (vector< vector<float> >::iterator i = pos_data_.begin();
         i != pos_data_.end();
         i++)
    {
      for (int k = 0; k < feat_count_; k++)
        tmp_mat->data.fl[k] = (float)((*i)[k]);
      if (forest.predict(tmp_mat) > 0)
        pos_right++;
      pos_total++;
    }

    int neg_right = 0;
    int neg_total = 0;
    for (vector< vector<float> >::iterator i = neg_data_.begin();
         i != neg_data_.end();
         i++)
    {
      for (int k = 0; k < feat_count_; k++)
        tmp_mat->data.fl[k] = (float)((*i)[k]);
      if (forest.predict(tmp_mat) < 0)
        neg_right++;
      neg_total++;
    }

    int test_right = 0;
    int test_total = 0;
    for (vector< vector<float> >::iterator i = test_data_.begin();
         i != test_data_.end();
         i++)
    {
      for (int k = 0; k < feat_count_; k++)
        tmp_mat->data.fl[k] = (float)((*i)[k]);
      if (forest.predict(tmp_mat) > 0)
        test_right++;
      test_total++;
    }

    printf(" Pos train set: %d/%d %g\n", pos_right, pos_total, (float)(pos_right) / pos_total);
    printf(" Neg train set: %d/%d %g\n", neg_right, neg_total, (float)(neg_right) / neg_total);
    printf(" Test set:      %d/%d %g\n", test_right, test_total, (float)(test_right) / test_total);

    cvReleaseMat(&tmp_mat);

  }
예제 #4
0
void Model::Predict_rtrees( const SampleSet& samples, SampleSet& outError )
{	
	int true_resp = 0;
	CvRTrees *model = (CvRTrees*)m_pModel;
	
	for (int i = 0; i < samples.N(); i++)
	{
		float ret = model->predict(samples.GetSampleAt(i), cv::Mat());
		if (ret != samples.GetLabelAt(i))
		{
			outError.Add(samples.GetSampleAt(i), samples.GetLabelAt(i));
		}
		else
		{
			true_resp++;
		}
	}
	printf("%d %d",samples.N(), true_resp);
}
예제 #5
0
파일: learning.cpp 프로젝트: PR2/pr2_plugs
CvRTrees* train_rf(CvMat* predictors, CvMat* labels)
{
	int stat[2];
	get_stat(labels, stat);
	printf("%d negative samples, %d positive samples\n", stat[0], stat[1]);
	
	const int tree_count = 500;
	const float priors[] = {0.25f,0.75f};
	CvRTrees* rtrees = new CvRTrees();
	CvRTParams rtparams = CvRTParams(5, 10, 0, false, 2, priors, true, 
									 (int)sqrt((float)predictors->cols), tree_count, 1e-6, 
									 CV_TERMCRIT_ITER + CV_TERMCRIT_EPS);
	CvMat* var_type = cvCreateMat(predictors->cols + 1, 1, CV_8UC1);
	for(int i = 0; i < predictors->cols; i++)
	{
		*(int*)(var_type->data.ptr + i*var_type->step) = CV_VAR_NUMERICAL;
	}
	*(int*)(var_type->data.ptr + predictors->cols*var_type->step) = CV_VAR_CATEGORICAL;
	rtrees->train(predictors, CV_ROW_SAMPLE, labels, 0, 0, var_type, 0, rtparams);
	return rtrees;
}
/* Examines the values at each leaf node in order to see what the distribution of data
  we put in is doing */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
	ASSERT_NUM_RHS_ARGS_EQUALS(1);
	
	const mxArray* forest_ptr = prhs[0];
	ASSERT_IS_POINTER(forest_ptr);
	CvRTrees *forest = (CvRTrees *) unpack_pointer(forest_ptr);
	
	// We are going to return a cell array with one cell per tree, so need this number
	int num_trees = forest->get_tree_count();
	mexPrintf("Loaded forest of %d trees, retrieving leave node values.\n", num_trees);

	mxArray *output_cell_array = mxCreateCellMatrix(1, num_trees);
	ASSERT_NON_NULL(output_cell_array);
	
	for (unsigned int t = 0; t < num_trees; t++) {
		mxArray* tree_struct = mxCreateStructArray(num_dims, dims, tree_num_fields, tree_field_names);
		ASSERT_NON_NULL(tree_struct);
		mxSetCell(output_cell_array, t, make_matlab_tree_struct(forest->get_tree(t)));
	}
	plhs[0] = output_cell_array;
}
  void train()
  {
    int sample_size = pos_data_.size() + neg_data_.size();
    feat_count_ = pos_data_[0].size();

    CvMat* cv_data = cvCreateMat(sample_size, feat_count_, CV_32FC1);
    CvMat* cv_resp = cvCreateMat(sample_size, 1, CV_32S);

    // Put positive data in opencv format.
    int j = 0;
    for (vector< vector<float> >::iterator i = pos_data_.begin();
         i != pos_data_.end();
         i++)
    {
      float* data_row = (float*)(cv_data->data.ptr + cv_data->step * j);
      for (int k = 0; k < feat_count_; k++)
        data_row[k] = (*i)[k];

      cv_resp->data.i[j] = 1;
      j++;
    }

    // Put negative data in opencv format.
    for (vector< vector<float> >::iterator i = neg_data_.begin();
         i != neg_data_.end();
         i++)
    {
      float* data_row = (float*)(cv_data->data.ptr + cv_data->step * j);
      for (int k = 0; k < feat_count_; k++)
        data_row[k] = (*i)[k];

      cv_resp->data.i[j] = -1;
      j++;
    }

    CvMat* var_type = cvCreateMat(1, feat_count_ + 1, CV_8U);
    cvSet(var_type, cvScalarAll(CV_VAR_ORDERED));
    cvSetReal1D(var_type, feat_count_, CV_VAR_CATEGORICAL);

    float priors[] = {1.0, 1.0};

    CvRTParams fparam(8, 20, 0, false, 10, priors, false, 5, 50, 0.001f, CV_TERMCRIT_ITER);
    fparam.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, 100, 0.1);

    forest.train(cv_data, CV_ROW_SAMPLE, cv_resp, 0, 0, var_type, 0,
                 fparam);


    cvReleaseMat(&cv_data);
    cvReleaseMat(&cv_resp);
    cvReleaseMat(&var_type);
  }
예제 #8
0
void normalValidation( DataSet& data, TrainResult& result)
{
	//these vars not needed - use empty Mat
	Mat varIdx, missingDataMask;
	
	
	
	Mat sampleIdx;

	result.train_hr = 0;
	result.test_hr = 0;
	result.fpRate = 0;
	result.fnRate = 0;
	
	//	printf( "numSamples %d", data.numSamples);
	
	//CvBoostTree boost;
	
	//define test and trainingsset
	float partTrain = 1.0/8.0;
	sampleIdx = Mat(1,data.numSamples,CV_8U,1.0);
	
	int negIdx = (int)floor(partTrain*data.numNeg);
	sampleIdx.colRange(negIdx*5, negIdx*6) = 0.0;
	
	
	int posIdx = (int)floor( partTrain*data.numPos );
	sampleIdx.colRange( data.numNeg+posIdx*5, data.numNeg + posIdx*6) = 0.0;
	
	//int numT = (cv::sum( sampleIdx ))[0];
	//printf("sample Idx sum (trainsamples): %d\n",numT);
	
	int numTestSamples = negIdx + posIdx;
	printf("numSamples: %d -- numTrainSamples: %d -- numTestSamples: %d\n",data.numSamples, data.numSamples-numTestSamples, numTestSamples );
	
	
	//training
	forest.train(data.data, CV_ROW_SAMPLE, data.responses, varIdx, sampleIdx, data.varType, missingDataMask, forestParams);
	
	//booster.train(data.data, CV_ROW_SAMPLE, data.responses, varIdx, sampleIdx, data.varType, missingDataMask, boostParams);
	
	//evaluation
	evaluation(forest, data, sampleIdx, result);
	
	
	double sum = (cv::sum(result.var_importance))[0];
	result.var_importance /= sum;
	
	printf( "____\nRecognition rate: train = %.2f%%, test = %.2f%% -- overall FN = %.2f%%, FP = %.2f%%\n",
		   result.train_hr*100., result.test_hr*100. ,result.fnRate*100. ,result.fpRate*100.);
}
예제 #9
0
void find_decision_boundary_RF()
{
    img.copyTo( imgDst );

    Mat trainSamples, trainClasses;
    prepare_train_data( trainSamples, trainClasses );

    // learn classifier
    CvRTrees  rtrees;
    CvRTParams  params( 4, // max_depth,
                        2, // min_sample_count,
                        0.f, // regression_accuracy,
                        false, // use_surrogates,
                        16, // max_categories,
                        0, // priors,
                        false, // calc_var_importance,
                        1, // nactive_vars,
                        5, // max_num_of_trees_in_the_forest,
                        0, // forest_accuracy,
                        CV_TERMCRIT_ITER // termcrit_type
                       );

    rtrees.train( trainSamples, CV_ROW_SAMPLE, trainClasses, Mat(), Mat(), Mat(), Mat(), params );

    Mat testSample(1, 2, CV_32FC1 );
    for( int y = 0; y < img.rows; y += testStep )
    {
        for( int x = 0; x < img.cols; x += testStep )
        {
            testSample.at<float>(0) = (float)x;
            testSample.at<float>(1) = (float)y;

            int response = (int)rtrees.predict( testSample );
            circle( imgDst, Point(x,y), 2, classColors[response], 1 );
        }
    }
}
static
int build_rtrees_classifier( char* data_filename,
    char* filename_to_save, char* filename_to_load )
{
    CvMat* data = 0;
    CvMat* responses = 0;
    CvMat* var_type = 0;
    CvMat* sample_idx = 0;

    int ok = read_num_class_data( data_filename, 16, &data, &responses );
    int nsamples_all = 0, ntrain_samples = 0;
    int i = 0;
    double train_hr = 0, test_hr = 0;
    CvRTrees forest;
    CvMat* var_importance = 0;

    if( !ok )
    {
        printf( "Could not read the database %s\n", data_filename );
        return -1;
    }

    printf( "The database %s is loaded.\n", data_filename );
    nsamples_all = data->rows;
    ntrain_samples = (int)(nsamples_all*0.8);

    // Create or load Random Trees classifier
    if( filename_to_load )
    {
        // load classifier from the specified file
        forest.load( filename_to_load );
        ntrain_samples = 0;
        if( forest.get_tree_count() == 0 )
        {
            printf( "Could not read the classifier %s\n", filename_to_load );
            return -1;
        }
        printf( "The classifier %s is loaded.\n", data_filename );
    }
    else
    {
        // create classifier by using <data> and <responses>
        printf( "Training the classifier ...\n");

        // 1. create type mask
        var_type = cvCreateMat( data->cols + 1, 1, CV_8U );
        cvSet( var_type, cvScalarAll(CV_VAR_ORDERED) );
        cvSetReal1D( var_type, data->cols, CV_VAR_CATEGORICAL );

        // 2. create sample_idx
        sample_idx = cvCreateMat( 1, nsamples_all, CV_8UC1 );
        {
            CvMat mat;
            cvGetCols( sample_idx, &mat, 0, ntrain_samples );
            cvSet( &mat, cvRealScalar(1) );

            cvGetCols( sample_idx, &mat, ntrain_samples, nsamples_all );
            cvSetZero( &mat );
        }

        // 3. train classifier
        forest.train( data, CV_ROW_SAMPLE, responses, 0, sample_idx, var_type, 0,
            CvRTParams(10,10,0,false,15,0,true,4,100,0.01f,CV_TERMCRIT_ITER));
        printf( "\n");
    }

    // compute prediction error on train and test data
    for( i = 0; i < nsamples_all; i++ )
    {
        double r;
        CvMat sample;
        cvGetRow( data, &sample, i );

        r = forest.predict( &sample );
        r = fabs((double)r - responses->data.fl[i]) <= FLT_EPSILON ? 1 : 0;

        if( i < ntrain_samples )
            train_hr += r;
        else
            test_hr += r;
    }

    test_hr /= (double)(nsamples_all-ntrain_samples);
    train_hr /= (double)ntrain_samples;
    printf( "Recognition rate: train = %.1f%%, test = %.1f%%\n",
            train_hr*100., test_hr*100. );

    printf( "Number of trees: %d\n", forest.get_tree_count() );

    // Print variable importance
    var_importance = (CvMat*)forest.get_var_importance();
    if( var_importance )
    {
        double rt_imp_sum = cvSum( var_importance ).val[0];
        printf("var#\timportance (in %%):\n");
        for( i = 0; i < var_importance->cols; i++ )
            printf( "%-2d\t%-4.1f\n", i,
            100.f*var_importance->data.fl[i]/rt_imp_sum);
    }

    //Print some proximitites
    printf( "Proximities between some samples corresponding to the letter 'T':\n" );
    {
        CvMat sample1, sample2;
        const int pairs[][2] = {{0,103}, {0,106}, {106,103}, {-1,-1}};

        for( i = 0; pairs[i][0] >= 0; i++ )
        {
            cvGetRow( data, &sample1, pairs[i][0] );
            cvGetRow( data, &sample2, pairs[i][1] );
            printf( "proximity(%d,%d) = %.1f%%\n", pairs[i][0], pairs[i][1],
                forest.get_proximity( &sample1, &sample2 )*100. );
        }
    }

    // Save Random Trees classifier to file if needed
    if( filename_to_save )
        forest.save( filename_to_save );

    cvReleaseMat( &sample_idx );
    cvReleaseMat( &var_type );
    cvReleaseMat( &data );
    cvReleaseMat( &responses );

    return 0;
}
예제 #11
0
int main()
{
    const int train_sample_count = 300;
    bool is_regression = false;

    const char* filename = "data/waveform.data";
    int response_idx = 21;

    CvMLData data;

    CvTrainTestSplit spl( train_sample_count );
    
    if(data.read_csv(filename) != 0)
    {
        printf("couldn't read %s\n", filename);
        exit(0);
    }

    data.set_response_idx(response_idx);
    data.change_var_type(response_idx, CV_VAR_CATEGORICAL);
    data.set_train_test_split( &spl );

    const CvMat* values = data.get_values();
    const CvMat* response = data.get_responses();
    const CvMat* missing = data.get_missing();
    const CvMat* var_types = data.get_var_types();
    const CvMat* train_sidx = data.get_train_sample_idx();
    const CvMat* var_idx = data.get_var_idx();
    CvMat*response_map;
    CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL);
    int num_classes = response_map->cols;
    
    CvDTree dtree;
    printf("======DTREE=====\n");
    CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0);
    dtree.train( &data, cvd_params);
    print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() );

#if 0
    /* boosted trees are only implemented for two classes */
    printf("======BOOST=====\n");
    CvBoost boost;
    boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
    print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 );
#endif

    printf("======RTREES=====\n");
    CvRTrees rtrees;
    rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() );

    printf("======ERTREES=====\n");
    CvERTrees ertrees;
    ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() );

    printf("======GBTREES=====\n");
    CvGBTrees gbtrees;
    CvGBTreesParams gbparams;
    gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression
    gbtrees.train( &data, gbparams);
    
    //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx);
    print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0);

    printf("======KNEAREST=====\n");
    CvKNearest knearest;
    //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses,
    //                const Mat& _sample_idx, bool _is_regression,
    //                int _max_k, bool _update_base )
    bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL;
    assert(is_classifier);
    int max_k = 10;
    knearest.train(values, response, train_sidx, is_regression, max_k, false);

    CvMat* new_response = cvCreateMat(response->rows, 1, values->type);
    //print_types();

    //const CvMat* train_sidx = data.get_train_sample_idx();
    knearest.find_nearest(values, max_k, new_response, 0, 0, 0);

    print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR),
                 knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0);

    printf("======== RBF SVM =======\n");
    //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows);
    CvMySVM svm1;
    CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF,
                                     /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm1.train(values, response, train_sidx, var_idx, params1);
    svm1.train_auto(values, response, var_idx, train_sidx, params1);
    svm_print_error(&svm1, values, response, response_idx, train_sidx);

    printf("======== Linear SVM =======\n");
    CvMySVM svm2;
    CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR,
                                     /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm2.train(values, response, train_sidx, var_idx, params2);
    svm2.train_auto(values, response, var_idx, train_sidx, params2);
    svm_print_error(&svm2, values, response, response_idx, train_sidx);

    printf("======NEURONAL NETWORK=====\n");

    int num_layers = 3;
    CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1));
    cvmSetI(&layers, 0, 0, values->cols-1);
    cvmSetI(&layers, 0, 1, num_classes);
    cvmSetI(&layers, 0, 2, num_classes);
    CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0);
    CvANN_MLP_TrainParams ann_params;
    //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP;
    CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes);

    CvMat values2 = cvmat_remove_column(values, response_idx);
    ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000);
    //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000);

    ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx);

#if 0 /* slow */

    printf("======== Polygonal SVM =======\n");
    //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows);
    CvMySVM svm3;
    CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY,
                                     /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm3.train(values, response, train_sidx, var_idx, params3);
    svm3.train_auto(values, response, var_idx, train_sidx, params3);
    svm_print_error(&svm3, values, response, response_idx, train_sidx);
#endif

    return 0;
}
예제 #12
0
int RandomTrees::train(const char* samples_filename, const char* model_filename, const double ratio, double &train_error, double &test_error)
{
	CvMat* data = 0;
	CvMat* responses = 0;
	CvMat* var_type = 0;
	CvMat* sample_idx = 0;

	this->tree_parameters_.nactive_vars = (int)sqrt(this->number_of_features_);

	int ok = read_num_class_data( samples_filename, this->number_of_features_, &data, &responses );
	int nsamples_all = 0, ntrain_samples = 0;
	int i = 0;
	double train_hr = 0, test_hr = 0;
	CvRTrees forest;
	CvMat* var_importance = 0;

	if( !ok )
	{
		cout << "Could not read the sample in" << samples_filename << endl;;
		return -1;
	}

	cout << "The sample file " << samples_filename << " is loaded." << endl;
	nsamples_all = data->rows;
	ntrain_samples = (int)(nsamples_all * ratio);


	// create classifier by using <data> and <responses>
	cout << "Training the classifier ..." << endl;

	// 1. create type mask
	var_type = cvCreateMat( data->cols + 1, 1, CV_8U );
	cvSet( var_type, cvScalarAll(CV_VAR_ORDERED) );
	cvSetReal1D( var_type, data->cols, CV_VAR_CATEGORICAL );

	// 2. create sample_idx
	sample_idx = cvCreateMat( 1, nsamples_all, CV_8UC1 );
	{
		CvMat mat;
		cvGetCols( sample_idx, &mat, 0, ntrain_samples );
		cvSet( &mat, cvRealScalar(1) );

		cvGetCols( sample_idx, &mat, ntrain_samples, nsamples_all );
		cvSetZero( &mat );
	}

	// 3. train classifier
	forest.train( data, CV_ROW_SAMPLE, responses, 0, sample_idx, var_type, 0, this->tree_parameters_);
	cout << endl;


	// compute prediction error on train and test data
	for( i = 0; i < nsamples_all; i++ )
	{
		double r;
		CvMat sample;
		cvGetRow( data, &sample, i );

		r = forest.predict( &sample );
		r = fabs((double)r - responses->data.fl[i]) <= FLT_EPSILON ? 1 : 0;

		if( i < ntrain_samples )
			train_hr += r;
		else
			test_hr += r;
	}

	test_hr /= (double)(nsamples_all-ntrain_samples);
	train_hr /= (double)ntrain_samples;

	train_error = 1 - train_hr;
	test_error = 1 - test_hr;

	cout << "Recognition rate: train = " << train_hr*100 << ", test = " << test_hr*100 << endl;
	cout << "Number of trees: " << forest.get_tree_count() << endl;

	// Print variable importance
	var_importance = (CvMat*)forest.get_var_importance();
	if( var_importance )
	{
		double rt_imp_sum = cvSum( var_importance ).val[0];
		printf("var#\timportance (in %%):\n");
		for( i = 0; i < var_importance->cols; i++ )
			printf( "%-2d\t%-4.1f\n", i,100.f*var_importance->data.fl[i]/rt_imp_sum);
	}

	// Save Random Trees classifier to file if needed
	if( model_filename )
		forest.save( model_filename );

	//cvReleaseMat( &var_importance );		//causes a segmentation fault
	cvReleaseMat( &sample_idx );
	cvReleaseMat( &var_type );
	cvReleaseMat( &data );
	cvReleaseMat( &responses );

	return 0;
}
예제 #13
0
파일: test_rgbd.cpp 프로젝트: far-ad/GP-RF
int main(int argc, char** argv)
{
  // std::cout<<FLT_EPSILON<<std::endl; 
  cv::Mat training_data, training_labels,testing_data, testing_labels;
  
  training_data = read_rgbd_data_cv(argv[1],NUMBER_OF_TRAINING_SAMPLES);
  training_labels = read_rgbd_data_cv(argv[2], NUMBER_OF_TRAINING_SAMPLES);
  testing_data = read_rgbd_data_cv(argv[3],NUMBER_OF_TESTING_SAMPLES);
  testing_labels = read_rgbd_data_cv(argv[4], NUMBER_OF_TESTING_SAMPLES);
  
 
  printf("dataset specs: %d samples with %d features\n", training_data.rows, training_data.cols);

  // define all the attributes as numerical
  // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
  // that can be assigned on a per attribute basis

  cv::Mat var_type = cv::Mat(training_data.cols + 1, 1, CV_8U );
  var_type.setTo(cv::Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical
  var_type.at<uchar>(training_data.cols, 0) = CV_VAR_CATEGORICAL; // the labels are categorical

  /********************************步骤1:定义初始化Random Trees的参数******************************/
  float priors[] = {1,1,1,1,1};  // weights of each classification for classes
  CvRTParams params = CvRTParams(25, // max depth
				 50, // min sample count
				 0, // regression accuracy: N/A here
				 false, // compute surrogate split, no missing data
				 15, // max number of categories (use sub-optimal algorithm for larger numbers)
				 priors, // the array of priors
				 false,  // calculate variable importance
				 20,       // number of variables randomly selected at node and used to find the best split(s).
				 NUMBER_OF_TREES,	 // max number of trees in the forest
				 0.01f,				// forrest accuracy
				 CV_TERMCRIT_ITER |	CV_TERMCRIT_EPS // termination cirteria
				 );
  
  /****************************步骤2:训练 Random Decision Forest(RDF)分类器*********************/
  // printf( "\nUsing training database: %s\n\n", argv[1]);
  CvRTrees* rtree = new CvRTrees;
  rtree->train(training_data, CV_ROW_SAMPLE, training_labels,
	       cv::Mat(), cv::Mat(), var_type, cv::Mat(), params);
  
  // perform classifier testing and report results
  cv::Mat test_sample, train_sample;
  int correct_class = 0;
  int wrong_class = 0;
  int result;
  int label;
  int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0};
  int false_negatives [NUMBER_OF_CLASSES] = {0,0,0,0,0};

  CvDTreeNode* leaf_nodes [training_data.rows];

  for (int tsample = 0; tsample < training_data.rows; tsample++)
    {
      train_sample = training_data.row(tsample);
      CvForestTree* tree = rtree->get_tree(1);
      CvDTreeNode* leaf_node = tree->predict(train_sample, cv::Mat());
      leaf_nodes[tsample] = leaf_node; 
    }

  // printf( "\nUsing testing database: %s\n\n", argv[2]);

  for (int tsample = 0; tsample < testing_data.rows; tsample++)
    {	       
      // extract a row from the testing matrix
      test_sample = testing_data.row(tsample);
      // train on the testing data:
      // test_sample = training_data.row(tsample);
      /********************************步骤3:预测*********************************************/

      result = (int) rtree->predict(test_sample, cv::Mat());
      label = (int) testing_labels.at<float>(tsample, 0);

      printf("Testing Sample %i -> class result (digit %d) - label (digit %d)\n", tsample, result, label);

      // get the leaf nodes of the first tree in the forest
      /*CvForestTree* tree = rtree->get_tree(0);
      std::list<const CvDTreeNode*> leaf_list;
      leaf_list = get_leaf_node( tree );
      printf("Number of Leaf nodes: %ld\n", leaf_list.size());*/

      // if the prediction and the (true) testing classification are the same
      // (N.B. openCV uses a floating point decision tree implementation!)
      if (fabs(result - label)
	  >= FLT_EPSILON)
	{
	  // if they differ more than floating point error => wrong class
	  wrong_class++;
	  false_positives[(int) result]++;
	  false_negatives[(int) testing_labels.at<float>(tsample, 0)]++;
	}
      else
	{
	  // otherwise correct
	  correct_class++;
	}
    }

  printf( // "\nResults on the testing database: %s\n"
	 "\tCorrect classification: %d (%g%%)\n"
	 "\tWrong classifications: %d (%g%%)\n",
	 // argv[2],
	 correct_class, (double) correct_class*100/testing_data.rows,
	 wrong_class, (double) wrong_class*100/testing_data.rows);

  for (int i = 0; i < NUMBER_OF_CLASSES; i++)
    {
      printf( "\tClass (digit %d) false postives 	%d (%g%%)\n\t                false negatives  %d (%g%%)\n", i,
	      false_positives[i],
	      (double) false_positives[i]*100/testing_data.rows,
	      false_negatives[i],
	      (double) false_negatives[i]*100/testing_data.rows);
    }

	// get all the leaf nodes in the forest
   for (int i = 0; i < NUMBER_OF_TREES; i ++)
	{ 
      	CvForestTree* tree = rtree->get_tree(i);
      	std::list<const CvDTreeNode*> leaf_list;
      	leaf_list = get_leaf_node( tree );
	}
  	//get training_sample indices for leaf nodes
  std::list<leaf_samples> node_indices;
  for (int i = 0; i < training_data.rows; i++) 
    {
      CvDTreeNode* leaf_node = leaf_nodes[i];

      if (leaf_node != NULL) 
	  {
		leaf_samples leaf_sample;
		leaf_sample.leaf = leaf_node;
		leaf_sample.indices.push_front(i);
		printf("\nValue of leaf: %f\n", leaf_node->value);
		printf("Smaple indices for leaf:\n");
		printf(" %d", i);

		for (int j=i+1; j < training_data.rows; j++) 
	  	{
	    	if (leaf_node == leaf_nodes[j])
			{
	      		leaf_sample.indices.push_front(j);
	      		printf(" %lu", j);
	      		leaf_nodes[j] = NULL;
	    	}
	  	}
		node_indices.push_front(leaf_sample);      
      }
    }
  	printf("\nSize of node_indices: %d\n", node_indices.size()); 
	//get labels and features
	

  //get double pointers for features and labels
  const double* p = testing_data.ptr<double>(0);
  std::vector<double> vec(p, p + testing_data.cols);

  
  // all matrix memory free by destructors

  // all OK : main returns 0
  // result = rtree->predict(testing_data.row(79), cv::Mat());
  // float andi = result - testing_labels.at<float>(79, 0);
  // // std::cout<<training_labels.row(0).col(0)<<std::endl;
  // std::cout<<andi<<std::endl;
  return 0;
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
    ASSERT_NUM_RHS_ARGS_GTE(2);
    ASSERT_NUM_LHS_ARGS_LT(3);

    const mxArray* dataMtx = prhs[0];
    const mxArray* targetValueVec = prhs[1];
    
    //see if we have been provided a struct containing options for the training. 
    //if not, then use defaults provided by opencv
    CvRTParams* rtParams;
    if (nrhs > 2) {
        mexPrintf("Parsing struct argument for parameters\n");
        rtParams = parse_struct_to_forest_config(prhs[2]);
    }
    else {
        mexPrintf("Using default parameters\n");
        rtParams = parse_struct_to_forest_config(NULL);
    }

    mexPrintf("Parameters:\n");
    print_forest_params(rtParams);
    
    unsigned int numSamples, numVariables;

    CvMat* dataCvMtx = matlab_matrix_to_opencv_matrix(dataMtx);
    numSamples = dataCvMtx->rows;
    numVariables = dataCvMtx->cols;
    mexPrintf("training data converted to opencv format. %d samples, each with %d variables\n",
              numSamples, numVariables);
#ifdef PRINT_INPUTS
    print_opencv_matrix(dataCvMtx);
#endif

    CvMat* targetCvMtx = matlab_array_to_opencv_array(targetValueVec);
    if (targetCvMtx->rows != numSamples) {
		MEX_ERR_PRINTF("training data had %d samples, labels contain %d values.", 
		               numSamples, targetCvMtx->rows);
    }
    mexPrintf("training labels converted to opencv format.\n");
#ifdef PRINT_INPUTS
    print_opencv_matrix(targetCvMtx);
#endif

    //specify the type of our variables. In this case, all our variables are 
    CvMat* var_type = cvCreateMat(dataCvMtx->cols + 1, 1, CV_8U);
    cvSet(var_type, cvScalarAll(CV_VAR_ORDERED));

    //actually make the forest and do the training
    clock_t start_time, end_time;
    mexPrintf("training now...");
    start_time = clock();
    CvRTrees *forest = new CvRTrees;
    forest->train(dataCvMtx, CV_ROW_SAMPLE, targetCvMtx, NULL, NULL, var_type, NULL, *rtParams);
    end_time = clock();
	clock_t diff_time = end_time - start_time;
	double seconds_passed = ((float)diff_time) / CLOCKS_PER_SEC;
    mexPrintf("training done in %fs\n", seconds_passed);

    //pack the pointer and return it to matlab
    plhs[0] = pack_pointer((void *)forest);

	// If the user supplied a second lhs argument, return them the time taken to train
	if (nlhs > 1) {
		plhs[1] = mxCreateDoubleScalar(seconds_passed);
	}
    
    cvReleaseMat(&var_type);
    cvReleaseMat(&dataCvMtx);
    cvReleaseMat(&targetCvMtx);
} 
 void save(char* file)
 {
   forest.save(file);
 }
예제 #16
0
void evaluation(CvRTrees& forest, DataSet& data, Mat& sampleIdx, TrainResult& result)
{

	int numTrainSamples = (cv::sum( sampleIdx ))[0];

	// retrieve variable_importance
	result.var_importance = forest.get_var_importance();
//	result.var_importance = forest.get_subtree_weights();
//	cout << result.var_importance << endl;

	double min,max;
	Point minLoc,maxLoc;

	minMaxLoc(result.var_importance,&min,&max,&minLoc,&maxLoc);
//	printf("variable importance (max:%.2f%%):\n\n",max*100.f);

	// compute prediction error on train and test data
	result.train_hr = 0; result.test_hr = 0; result.fpRate = 0; result.fnRate = 0;


	Mat responses_new = Mat(data.numSamples,1,CV_32F,9.0);

	for(int i = 0; i < data.numSamples; i++ )
	{
		double r;
		Mat sample = data.data.row(i);

		// do prediction with trained forest
		r = forest.predict(sample);
		responses_new.at<float>(i,0) = r;
		float respo = data.responses.at<float>(i,0);

		// prediction correct ?
		r = fabs(r - respo) <= FLT_EPSILON ? 1 : 0;


		if( sampleIdx.at<char>(0,i) )
			result.train_hr += r;
		else
			result.test_hr += r;

		// false prediction, increase appropriate counter
		if(!r)
		{
			if(respo)
				result.fnRate += 1;
			else
				result.fpRate += 1;
		}
	}
//	cout << sampleIdx << endl;
//	cout << data.responses << endl;
//	cout << responses_new << endl;

	result.test_hr /= (double)(data.numSamples-numTrainSamples);
	result.train_hr /= (double)numTrainSamples;

	result.fpRate /= (double) data.numNeg;
	result.fnRate /= (double) data.numPos;

}
예제 #17
0
/** @function main */
int main( int argc, char** argv )
{
	

	
	char selection;
	cout<<"Welcome to Plant Recognition System"<<endl;
	cout<<"Please select following in order to make an operation:"<<endl;
	cout<<"S for Segmentation and Feature Extraction of Normal Leaf Image"<<endl;
	cout<<"F for Feature Extraction of a Binary Image"<<endl;
	cout<<"C for Classification of Test Set with NN"<<endl;
	cout<<"T for Feature Extraction and Training of Train Set"<<endl;
	cout<<"Q for Extracted Features to CSV File"<<endl;
	cout<<"R for Classification with Random Forests"<<endl;
	cout<<"E for SIFT "<<endl;
	cout<<"Y for leaf detection test with SIFT+BoF+SVM"<<endl;
	cin>>selection;


	switch(selection)
	{

		case 'e':
		case 'E':
			{
				
				/*
				IplImage* input=cvLoadImage("C:/fb2.jpg", CV_LOAD_IMAGE_GRAYSCALE);
				vector<KeyPoint> keypoints;
				OutputArray descriptors;
				InputArray mask;*/

				

				
				
				
				
				
		
				/*
				Mat input=imread("C:/fb2.jpg", CV_LOAD_IMAGE_GRAYSCALE);
				
				if( !input.data  )
				{
					cout<<"Error while loading data"<<endl;
					return -1;
				}
				
				int minHessian = 400;
				SurfFeatureDetector detector( minHessian );
				
				std::vector<KeyPoint> keypoints_1;
				detector.detect( input, keypoints_1 );
				
				Mat img_keypoints_1; 
				drawKeypoints( input, keypoints_1, img_keypoints_1, Scalar::all(-1), DrawMatchesFlags::DEFAULT );
				imshow("Keypoints 1", img_keypoints_1 );*/
				
				/*
				const cv::Mat input = cv::imread("C:/MyPic.png", 0); //Load as grayscale

				cv::SiftFeatureDetector detector;
				std::vector<cv::KeyPoint> keypoints;
				detector.detect(input, keypoints);
				std::vector<cv::Point2f> points;
				std::vector<cv::KeyPoint>::iterator it;
				*/






				//cv::Mat pointMatrix(points);

		

				
				// Add results to image and save.
				/*cv::Mat output;
				cv::drawKeypoints(input, keypoints, output);
				cv::imwrite("C:/sift_result.jpg", output);*/

				//waitKey(0);
				//return 0;


			}
			break;

	////TRAIN VE TEST FEATURELARI CSV DOSYASI OLARAK TUTULDU (RANDOM FOREST ICIN)
		case 'q':
		case 'Q':
			{
				ConvertToCSV* myCsv=new ConvertToCSV();
				myCsv->TrainFeaturesAsCSV();
				myCsv->TestFeaturesAsCSV();
			}
		break;


	////SEGMENTATION WITH GRABCUT THEN FEATURE EXTRACTION
		case 'S':
		case 's':
			{
				Segment *mySegment=new Segment();
				mySegment->makeSegmentation();

				IplImage* segmented=cvCloneImage(mySegment->getSegmentedImage());
				//cvShowImage("segmentedImage", segmented );

				IplImage* converted=cvCreateImage( cvGetSize( segmented ), 8, 3 );
				cvCvtColor(segmented, converted, CV_GRAY2RGB);
				cvShowImage("converted", converted );
				ExtractDescriptorHelper* myExtract=new ExtractDescriptorHelper();
				myExtract->ExtractDescriptors(converted);
			}
		break;
		
	////FEATURE EXTRACTION OF SINGLE IMAGE
		case 'F':
		case 'f':
			{
				ImageReaderHelper* tmpReader=new ImageReaderHelper();
				IplImage* src= tmpReader->readBinaryImage();
				ExtractDescriptorHelper* tmpDescriptorFinder=new ExtractDescriptorHelper();
				tmpDescriptorFinder->ExtractDescriptors(src);
				tmpDescriptorFinder->sortVector();
				tmpDescriptorFinder->createFeatureVector();
			}
		break;

	////GEOMETRIC FEATURELARIN NORMALIZASYONU
	/*
	NormaliseGeoFeatures *normaliseTmp=new NormaliseGeoFeatures();
	//normaliseTmp->produceFileNamesVect();
	normaliseTmp->initializeMaxMin();
	normaliseTmp->calcMaxMin();
	normaliseTmp->normalizeGeoFeatures();
	normaliseTmp->writeMaxMinToFile();
	*/
	
	////CLASSIFICATION WITH NEAREST NEIGHBOUR
		case 'C':
		case 'c':
			{
				int b;
				Classify *temp=new Classify();
				//temp->getMinMaxFromFile();
				b=temp->makeClassification();
				//temp->sortDataVect();
				//temp->printVector();
			}
		break;
		
	/*
	int d;
	ClassifyIndividual *myTemp=new ClassifyIndividual();
	myTemp->makeClassification();
	myTemp->sortDataVect();
	myTemp->printVector();
	*/
	
	
	//FEATURE EXTRACTION AND WRITING IT TO TXT
	
	//DOSYADAN OKUMA VE FEATURELARI TXT DOSYASINA YAZMA YAPILDI, ÇALIŞIYOR.
	//C:/Deneme/ DİZİNİNDEKİ DOSYALAR İÇİN GERÇEKLENDİ, FEATURELAR YAZILIYOR..
		case 'T':
		case 't':
			{
				int sayac=0;
				DIR *dir;
				struct dirent *ent;
				char folder[100];
				char writeFile[100];
				string write;
				string str1="C:/TrainSet/.";
				string str2="C:/TrainSet/..";
				string str3="C:/TrainSet/Thumbs.db";
				if ((dir = opendir ("C:/TrainSet/"))) 
					{
			
						while ((ent = readdir (dir)) != NULL) 
						{
								sprintf (folder, "C:/TrainSet/%s", ent->d_name);
								sprintf (writeFile, "C:/Features/%s.txt", ent->d_name);
								cout<<"File name:"<<folder<<endl;
					
								if(str1.compare(folder)==0 || str2.compare(folder)==0 || str3.compare(folder)==0)
									continue;
					
								IplImage* src;
								src=cvLoadImage(folder);
								ExtractDescriptorHelper* tmpDescriptorFinder=new ExtractDescriptorHelper();
								tmpDescriptorFinder->ExtractDescriptors(src);
					
								tmpDescriptorFinder->sortVector();
								tmpDescriptorFinder->createFeatureVector();
								vector<double> writeVector=tmpDescriptorFinder->getMyFeatureVector();
								double featureArray[featureVectSize];
					
								free(tmpDescriptorFinder);
								sayac++;

								for(int p=0; p<featureVectSize; p++)
									featureArray[p]=writeVector.at(p);

								ofstream myfile;
								myfile.open (writeFile);
								for(int k=0; k<featureVectSize; k++)
									myfile << featureArray[k]<<"\n";
								myfile.close();
					
					
						}
					closedir (dir);
					} 
		
					else {
			
						cout<<"Error exists"<<endl;
						perror ("");
						return EXIT_FAILURE;
						}

					cout<<"Sayac: "<<sayac<<endl;
			
			}
		break;

		case 'R':
		case 'r':
			{
				// lets just check the version first
	
				printf ("OpenCV version %s (%d.%d.%d)\n", CV_VERSION,
							CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);

				// define training data storage matrices (one for attribute examples, one
				// for classifications)
				//CV_8UC(15) , CV_32FC1
				Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
				Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);

				//define testing data storage matrices

				Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
				Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);

				// define all the attributes as numerical
				// alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
				// that can be assigned on a per attribute basis

				Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U );
				var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical

				// this is a classification problem (i.e. predict a discrete number of class
				// outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL

				var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;

				double result; // value returned from a prediction

				// load training and testing data sets

				if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
						read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
				{
					// define the parameters for training the random forest (trees)

					float priors[] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};  // weights of each classification for classes
					// (all equal as equal samples of each digit)

					CvRTParams params = CvRTParams(20, // max depth
												5, // min sample count
												0, // regression accuracy: N/A here
												false, // compute surrogate split, no missing data
												15, // max number of categories (use sub-optimal algorithm for larger numbers)
												priors, // the array of priors
												false,  // calculate variable importance
												40,       // number of variables randomly selected at node and used to find the best split(s).
												100,	 // max number of trees in the forest
												0.01f,				// forrest accuracy
												CV_TERMCRIT_ITER |	CV_TERMCRIT_EPS // termination cirteria
												);

					// train random forest classifier (using training data)

					printf( "\nUsing training database: %s\n\n", argv[1]);
					CvRTrees* rtree = new CvRTrees;

					rtree->train(training_data, CV_ROW_SAMPLE, training_classifications,
								 Mat(), Mat(), var_type, Mat(), params);

					// perform classifier testing and report results

					Mat test_sample;
					int correct_class = 0;
					int wrong_class = 0;
					int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};

					printf( "\nUsing testing database: %s\n\n", argv[2]);

					for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
					{

						// extract a row from the testing matrix

						test_sample = testing_data.row(tsample);

						// run random forest prediction

						result = rtree->predict(test_sample, Mat());

						printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result);

						// if the prediction and the (true) testing classification are the same
						// (N.B. openCV uses a floating point decision tree implementation!)

						if (fabs(result - testing_classifications.at<float>(tsample, 0))
								>= FLT_EPSILON)
						{
							// if they differ more than floating point error => wrong class

							wrong_class++;

							false_positives[(int) result]++;

						}
						else
						{

							// otherwise correct

							correct_class++;
						}
					}

					printf( "\nResults on the testing database: %s\n"
							"\tCorrect classification: %d (%g%%)\n"
							"\tWrong classifications: %d (%g%%)\n",
							argv[2],
							correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
							wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);

					for (int i = 0; i < NUMBER_OF_CLASSES; i++)
					{
						printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
								false_positives[i],
								(double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
					}


					// all matrix memory free by destructors


					// all OK : main returns 0

					return 0;
				}

				// not OK : main returns -1

				return -1;
			
			}
		break;

		case 'Y':
		case 'y':
			{
				isLeaf *myLeafTmp=new isLeaf();

				//myLeafTmp->produceDictionary();  *** herhangi 2sini veya 3unu aynı anda çalıştırma.
				myLeafTmp->produceTrainData();  // everything here.
				//myLeafTmp->produceTestData();
				//myLeafTmp->isLeafOrNot();  crashes
			}
			break;
	}
		
	waitKey(0);
	return 0;
};
예제 #18
0
int main()
{
	//-----------------------------------读图片------------------------
	IplImage*** imgs = new IplImage**[END - START + 1];
	IplImage*** tex_imgs = new IplImage**[END - START + 1];
	for (int i = 0; i < END - START + 1; i++)
	{
		imgs[i] = new IplImage*[5];
		tex_imgs[i] = new IplImage*[4];
	}
	for (int i = 0; i < END - START + 1; i++)
	{
		for (int j = 0; j < 4; j++)
		{
			imgs[i][j] = NULL;
			tex_imgs[i][j] = NULL;
		}
		imgs[i][4] = NULL;
	}
	//----------------------------------------
	cout << "read image..........." << endl;
	for (int i = START; i <= END; i++)
	{
		char flairname[100], t1name[100], t1cname[100], t2name[100], truthname[100];
		memset(flairname, 0, 100); memset(t1name, 0, 100); memset(t1cname, 0, 100); memset(t2name, 0, 100); memset(truthname, 0, 100);

		sprintf(flairname, "BRATS_HG0005_FLAIR/BRATS_HG0005_FLAIR_%d.png", i);
		sprintf(t1name, "BRATS_HG0005_T1/BRATS_HG0005_T1_%d.png", i);
		sprintf(t1cname, "BRATS_HG0005_T1C/BRATS_HG0005_T1C_%d.png", i);
		sprintf(t2name, "BRATS_HG0005_T2/BRATS_HG0005_T2_%d.png", i);
		sprintf(truthname, "BRATS_HG0005_truth/BRATS_HG0005_truth_%d.png", i);

		IplImage* flair_img = RGB2GRAY(cvLoadImage(flairname));
		IplImage* t1_img = RGB2GRAY(cvLoadImage(t1name));
		IplImage* t1c_img = RGB2GRAY(cvLoadImage(t1cname));
		IplImage* t2_img = RGB2GRAY(cvLoadImage(t2name));
		IplImage* truth_img = RGB2GRAY(cvLoadImage(truthname));

		imgs[i - START][0] = flair_img;
		imgs[i - START][1] = t1_img;
		imgs[i - START][2] = t1c_img;
		imgs[i - START][3] = t2_img;
		imgs[i - START][4] = truth_img;
		//获取纹理图
		IplImage* flair_tex = cvCreateImage(cvGetSize(flair_img), IPL_DEPTH_8U, 1);
		IplImage* t1_tex = cvCreateImage(cvGetSize(t1_img), IPL_DEPTH_8U, 1);
		IplImage* t1c_tex = cvCreateImage(cvGetSize(t1c_img), IPL_DEPTH_8U, 1);
		IplImage* t2_tex = cvCreateImage(cvGetSize(t2_img), IPL_DEPTH_8U, 1);
		LBP(flair_img, flair_tex);
		LBP(t1_img, t1_tex);
		LBP(t1c_img, t1c_tex);
		LBP(t2_img, t2_tex);

		tex_imgs[i - START][0] = flair_tex;
		tex_imgs[i - START][1] =t1_tex;
		tex_imgs[i - START][2] = t1c_tex;
		tex_imgs[i - START][3] = t2_tex;
	}
	//----------------------------------------------------------
	cout << "read training data............" << endl;
	Mat train_datas(HEIGHT*WIDTH*(END - START + 1), ATTRIBUTES_PER_SAMPLE, CV_32FC1);    
	Mat responses(HEIGHT*WIDTH*(END - START + 1), 1, CV_32SC1);
	//---读取训练数据----
	int dataline=read_training_data(imgs,tex_imgs, train_datas, responses);
	Mat _train_datas(dataline, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
	Mat _responses(dataline, 1, CV_32SC1);
	//减少训练数据为dataline个
	for (int i = 0; i < dataline; i++)
	{
		float* float_data = train_datas.ptr<float>(i);
		int* int_data = responses.ptr<int>(i);

		_train_datas.at<float>(i, 0) = float_data[0];
		_train_datas.at<float>(i, 1) = float_data[1];
		_train_datas.at<float>(i, 2) = float_data[2];
		_train_datas.at<float>(i, 3) = float_data[3];

		_train_datas.at<float>(i, 4) = float_data[4];
		_train_datas.at<float>(i, 5) = float_data[5];
		_train_datas.at<float>(i, 6) = float_data[6];
		_train_datas.at<float>(i, 7) = float_data[7];

		_train_datas.at<float>(i, 8) = float_data[8];

		_responses.at<int>(i, 0) = int_data[0];
	}
	//----设置输入类型---
	Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE+1, 1, CV_8U);
	var_type.setTo(Scalar(CV_VAR_NUMERICAL)); // all inputs are numerical  
	var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;
	//---训练数据---
	cout << "training......." << endl;
	float priors[NUMBER_OF_CLASSES] = { 1, 1 };
	CvRTParams params = CvRTParams(25, // max depth  
	                       4, // min sample count  
	                       0, // regression accuracy: N/A here  
                           false, // compute surrogate split, no missing data  
		                   5, // max number of categories (use sub-optimal algorithm for larger numbers)  
	                        priors, // the array of priors  
	                        false,  // calculate variable importance  
		                    3,       // number of variables randomly selected at node and used to find the best split(s).  
							3,  // max number of trees in the forest  
                            0.01f,               // forrest accuracy  
	                        CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria  
		);
	CvRTrees* rtree = new CvRTrees;
	bool train_result = rtree->train(_train_datas, CV_ROW_SAMPLE, _responses,
		Mat(), Mat(), var_type, Mat(), params);
	if (train_result == false)
		cout << "random trees train failed!" << endl;
	
	cout << "predicting.........." << endl;
	//-------预测数据生成图片并存储---------
	for (int k = 0; k < END - START + 1; k++)
	{
		IplImage* img_dst = cvCreateImage(cvGetSize(imgs[k][0]), IPL_DEPTH_8U, 1);
		uchar* ptr;
		for (int i = 0; i <HEIGHT ; i++)
		{
			ptr = (uchar*)img_dst->imageData + i*img_dst->widthStep;
			for (int j = 0; j < WIDTH; j++)
			{//读一行数据
				Mat test_data(1, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
				test_data.at<float>(0, 0) = cvGet2D(imgs[k][0], i, j).val[0];
				test_data.at<float>(0, 1) = cvGet2D(imgs[k][1], i, j).val[0];
				test_data.at<float>(0, 2) = cvGet2D(imgs[k][2], i, j).val[0];
				test_data.at<float>(0, 3) = cvGet2D(imgs[k][3], i, j).val[0];
			
				test_data.at<float>(0, 4) = cvGet2D(tex_imgs[k][0], i, j).val[0];
				test_data.at<float>(0, 5) = cvGet2D(tex_imgs[k][1], i, j).val[0];
				test_data.at<float>(0, 6) = cvGet2D(tex_imgs[k][2], i, j).val[0];
				test_data.at<float>(0, 7) = cvGet2D(tex_imgs[k][3], i, j).val[0];

				test_data.at<float>(0, 8) = k;
				//产生结果
				int result = rtree->predict(test_data, Mat());
				*(ptr + j) = result * 255;
			}
		}
		IplConvKernel* strel = cvCreateStructuringElementEx(5, 5, 2, 2, CV_SHAPE_ELLIPSE);
		cvErode(img_dst, img_dst, strel, 1);
		//cvDilate(img_dst, img_dst, strel, 1);
		
		cout << "save image  " << k + START << endl;
		char result_name[100];
		memset(result_name, 0, 100);
		sprintf(result_name, "BRATS_HG0005_RESULT/BRATS_HG0005_RESULT_%d.png", k+START);
		cvSaveImage(result_name, img_dst);
	}
	cout << "complete!!!" << endl;
	cvWaitKey(0);
}
예제 #19
0
void doCrossValidation( DataSet& data, TrainResult& result)
{
	//these vars not needed - use empty Mat
	Mat varIdx, missingDataMask;

//	BoostParams forestParams = cv::BoostParams(cv::Boost::DEFAULT, 100, 0.95, 5, false, 0 );

	Mat sampleIdx;
	int nFold = 5;
	result.train_hr = 0;
	result.test_hr = 0;
	result.fpRate = 0;
	result.fnRate = 0;

//	printf( "numSamples %d", data.numSamples);

	
	// define training/test-sets within trainData
	for(int round = 0; round < nFold; round++)
	{


		//define test and trainingsset
		float partTrain = 1.0/nFold;
		sampleIdx = Mat(1,data.numSamples,CV_8U,1.0);

		int negIdx = (int)floor(partTrain*data.numNeg);
		sampleIdx.colRange(negIdx*round, negIdx*(round+1)) = 0.0;


		int posIdx = (int)floor( partTrain*data.numPos );
		sampleIdx.colRange( data.numNeg+posIdx*round, data.numNeg + posIdx*(round+1)) = 0.0;

		//int numT = (cv::sum( sampleIdx ))[0];
		//printf("sample Idx sum (trainsamples): %d\n",numT);
		
		int numTestSamples = negIdx + posIdx;
		printf("numSamples: %d -- numTrainSamples: %d -- numTestSamples: %d\n",data.numSamples, data.numSamples-numTestSamples, numTestSamples );


		//training
		forest.train(data.data, CV_ROW_SAMPLE, data.responses, varIdx, sampleIdx, data.varType, missingDataMask, forestParams);


		//evaluation
		TrainResult roundResult;
		evaluation(forest, data, sampleIdx, roundResult);

		result.fnRate 	+= roundResult.fnRate;
		result.fpRate 	+= roundResult.fpRate;
		result.test_hr 	+= roundResult.test_hr;
		result.train_hr += roundResult.train_hr;
		if( round == 0 )
			result.var_importance = roundResult.var_importance.clone();
		else
			result.var_importance += roundResult.var_importance;

		printf( "Round %d.Recognition rate: train = %.2f%%, test = %.2f%% -- overall FN = %.2f%%, FP = %.2f%%\n",
				round, roundResult.train_hr*100., roundResult.test_hr*100. ,roundResult.fnRate*100. ,roundResult.fpRate*100.);
	}
	result.fnRate 	/= nFold;
	result.fpRate 	/= nFold;
	result.test_hr 	/= nFold;
	result.train_hr /= nFold;
	result.var_importance /= nFold;
	double sum = (cv::sum(result.var_importance))[0];
	result.var_importance /= sum;

	printf( "____\nRecognition rate: train = %.2f%%, test = %.2f%% -- overall FN = %.2f%%, FP = %.2f%%\n",
			result.train_hr*100., result.test_hr*100. ,result.fnRate*100. ,result.fpRate*100.);
}
예제 #20
0
Int_t main()
{
    // Access ntuples
    TFile* file[2];
    file[0] = new TFile("~/SingleMuon_pT_501_500.root");
    file[1] = new TFile("~/SingleMuon_pT_200_150.root");
    TTree* tree[2];
    tree[0] = (TTree*)file[0]->Get("trees");
    tree[1] = (TTree*)file[1]->Get("trees");

    // Declare variables and set branch addresses
    Double_t ptR[2] = {0, 0}, ptER[2] = {0, 0}, chi2R[2] = {0, 0}, d0R[2] = {0, 0}, dXYR[2] = {0, 0}, dZR[2] = {0, 0}, d0ER[2] = {0, 0}, etaR[2] = {0, 0}, etaER[2] = {0, 0}, phiR[2] = {0, 0}, resXR[2] = {0, 0}, resYR[2] = {0, 0};
    Double_t ptG[2] = {0, 0}, etaG[2] = {0, 0}, phiG[2] = {0, 0};
    Double_t globalTrkX[2] = {0, 0}, globalTrkY[2] = {0, 0}, globalTrkZ[2] = {0, 0}, hitPosX[2] = {0, 0}, hitPosY[2] = {0, 0}, hitPosZ[2] = {0, 0}, transImpPar4RecHits[2] = {0, 0};
    Int_t foundR[2] = {0, 0}, lostR[2] = {0, 0}, ndofR[2] = {0, 0}, idG[2] = {0, 0}, eventN[2] = {0, 0}, nRepeats[2] = {0, 0}, nMuons[2] = {0, 0};
    Int_t muonHits[2] = {0, 0}, dtHits[2] = {0, 0}, cscHits[2] = {0, 0}, rpcHits[2] = {0, 0};	// Muon hits
    Int_t pixelHits[2] = {0, 0}, barrelHits[2] = {0, 0}, endcapHits[2] = {0, 0};	// Pixel hits
    Int_t stripHits[2] = {0, 0}, tibHits[2] = {0, 0}, tidHits[2] = {0, 0}, tobHits[2] = {0, 0}, tecHits[2] = {0, 0};	// Strip hits
    Bool_t hQualR[2] = {0, 0}, repeatFlag[2] = {0, 0};
    // Missing folder items in ntuple??

    Long64_t event[2];
    for (Int_t t = 0; t < 2; t++)
    {
	event[t] = tree[t]->GetEntries();
	tree[t]->SetBranchAddress("ptR", &ptR[t]);
	tree[t]->SetBranchAddress("ptER", &ptER[t]);
	tree[t]->SetBranchAddress("chi2R", &chi2R[t]);
	tree[t]->SetBranchAddress("d0R", &d0R[t]);
	tree[t]->SetBranchAddress("dXYR", &dXYR[t]);
	tree[t]->SetBranchAddress("dZR", &dZR[t]);
	tree[t]->SetBranchAddress("d0ER", &d0ER[t]);
	tree[t]->SetBranchAddress("foundR", &foundR[t]);
	tree[t]->SetBranchAddress("lostR", &lostR[t]);
	tree[t]->SetBranchAddress("etaR", &etaR[t]);
	tree[t]->SetBranchAddress("etaER", &etaER[t]);
	tree[t]->SetBranchAddress("phiR", &phiR[t]);
	tree[t]->SetBranchAddress("hQualR", &hQualR[t]);
	tree[t]->SetBranchAddress("ndofR", &ndofR[t]);
	tree[t]->SetBranchAddress("ptG", &ptG[t]);
	tree[t]->SetBranchAddress("etaG", &etaG[t]);
	tree[t]->SetBranchAddress("phiG", &phiG[t]);
	tree[t]->SetBranchAddress("idG", &idG[t]);
	tree[t]->SetBranchAddress("residualXR", &resXR[t]);
	tree[t]->SetBranchAddress("residualYR", &resYR[t]);
	tree[t]->SetBranchAddress("globalTrkX", &globalTrkX[t]);
	tree[t]->SetBranchAddress("globalTrkY", &globalTrkY[t]);
	tree[t]->SetBranchAddress("globalTrkZ", &globalTrkZ[t]);
	tree[t]->SetBranchAddress("numberOfValidMuonHits", &muonHits[t]);
	tree[t]->SetBranchAddress("numberOfValidPixelHits", &pixelHits[t]);
	tree[t]->SetBranchAddress("numberOfValidPixelBarrelHits", &barrelHits[t]);
	tree[t]->SetBranchAddress("numberOfValidPixelEndcapHits", &endcapHits[t]);
	tree[t]->SetBranchAddress("numberOfValidStripHits", &stripHits[t]);
	tree[t]->SetBranchAddress("numberOfValidStripTIBHits", &tibHits[t]);
	tree[t]->SetBranchAddress("numberOfValidStripTIDHits", &tidHits[t]);
	tree[t]->SetBranchAddress("numberOfValidStripTOBHits", &tobHits[t]);
	tree[t]->SetBranchAddress("numberOfValidStripTECHits", &tecHits[t]);
	tree[t]->SetBranchAddress("numberOfValidMuonDTHits", &dtHits[t]);
	tree[t]->SetBranchAddress("numberOfValidMuonCSCHits", &cscHits[t]);
	tree[t]->SetBranchAddress("numberOfValidMuonRPCHits", &rpcHits[t]);
	tree[t]->SetBranchAddress("eventN", &eventN[t]);
	tree[t]->SetBranchAddress("repeatFlag", &repeatFlag[t]);
	tree[t]->SetBranchAddress("numbRepeats", &nRepeats[t]);
	tree[t]->SetBranchAddress("numbMuons", &nMuons[t]);
	tree[t]->SetBranchAddress("hitPosX", &hitPosX[t]);
	tree[t]->SetBranchAddress("hitPosY", &hitPosY[t]);
	tree[t]->SetBranchAddress("hitPosZ", &hitPosZ[t]);
	tree[t]->SetBranchAddress("transImpPar4RecHits", &transImpPar4RecHits[t]);
    }

    // Forest parameters
    const Int_t VARS = 4;
    Int_t train_good = 200;	// out of 791
    Int_t train_bad = 200;	// out of 678
    Int_t max_trees = 50;
    Int_t max_depth = 15;
    Int_t nactive_vars = 0;	// 0 for sqrt(VARS)
    Int_t min_sample_count = 10;
    Float_t regression_accuracy = 0;
    Bool_t use_surrogates = false;
    Int_t max_categories = 2;
    Float_t priors[] = {1., 1.};
    Bool_t calc_var_importance = false;
    Float_t forest_accuracy = 0.01;
    Int_t termcrit_type = CV_TERMCRIT_ITER;	// CV_TERMCRIT_EPS or ITER

    // Create canvases
    TCanvas* c1 = new TCanvas("c1", "Histogram and ROC curve", 1280, 480);
    c1->Divide(2, 1);
    c1->SetGrid();
    c1->SetLogx();
    c1->SetLogy();
    TCanvas* c2 = new TCanvas("c2", "Feature histograms", 1280, 720);
    c2->Divide(2, 2);
    TFile* canvas = new TFile("canvas.root", "RECREATE");
    TLegend* legend = new TLegend(0.11, 0.7, 0.4, 0.89);

    // Create histogram and graph arrays
    const Int_t BINS = 101;
    Double_t xmin = -0.01;
    Double_t xmax = 1.01;
    TH1D* hist[2];
    TGraph* graph;
    hist[0] = new TH1D("h0", "Good tracks", BINS, xmin, xmax);
    hist[1] = new TH1D("h1", "Bad tracks", BINS, xmin, xmax);
    TH1D* featH[2][4];

    // Blank placeholder histogram for graph axes
    TH2D* blank = new TH2D("g1", "ROC curve", 2, 0.3, 1.1, 2, 0.001, 1.1);
    blank->GetXaxis()->SetTitle("good efficiency");
    blank->GetYaxis()->SetTitle("bad efficiency");
    blank->SetStats(0);
//    theRNG().state = getTickCount();
    
    // Create training array
    const Int_t TRAIN = train_good + train_bad;
    Int_t resp[TRAIN];
    Float_t train[TRAIN][VARS];
    for (Int_t i = 0; i < train_good; i++)
    {
	tree[0]->GetEvent(i);
	resp[i] = 0;
//	train[i][0] = tibHits[0];
//	train[i][1] = tidHits[0];
//	train[i][2] = tobHits[0];
//	train[i][3] = tecHits[0];
	train[i][0] = ptR[0];
	train[i][1] = etaR[0];
	train[i][2] = phiR[0];
	train[i][3] = foundR[0];
    }
    for (Int_t i = 0; i < train_bad; i++)
    {
	tree[1]->GetEvent(i);
	resp[i + train_good] = 1;
//	train[i + train_good][0] = tibHits[1];
//	train[i + train_good][1] = tidHits[1];
//	train[i + train_good][2] = tobHits[1];
//	train[i + train_good][3] = tecHits[1]; 
	train[i + train_good][0] = ptR[1];
	train[i + train_good][1] = etaR[1];
	train[i + train_good][2] = phiR[1];
	train[i + train_good][3] = foundR[1];
    }
    Int_t tflag = CV_ROW_SAMPLE;
    Mat responses(TRAIN, 1, CV_32SC1, resp);
    Mat train_data(TRAIN, VARS, CV_32FC1, train);
/*
    for (Int_t i = 0; i < TRAIN; i++)
    {
	for (Int_t j = 0; j < VARS; j++)
	    cout << train[i][j] << "\t";
	cout << endl;
    }
*/
    // Create type mask
    Int_t var[VARS + 1];
    for (Int_t i = 0; i < VARS; i++)
    	var[i] = CV_VAR_ORDERED;
    var[VARS] = CV_VAR_CATEGORICAL;
    Mat var_type(VARS + 1, 1, CV_32SC1, var);
    var_type.convertTo(var_type, CV_8SC1);	// Convert to 8-bit ints
    
    // Create missing data mask
    Int_t miss_t[TRAIN][VARS];
    for (Int_t i = 0; i < TRAIN; i++)
    {
	for (Int_t j = 0; j < VARS; j++)
	    miss_t[i][j] = 0;
    }
    Mat missing_data_mask(TRAIN, VARS, CV_32SC1, miss_t);
    missing_data_mask.convertTo(missing_data_mask, CV_8UC1);

    // Create indices
    Mat var_idx = Mat::ones(VARS, 1, CV_8UC1);
    Mat sample_idx = Mat::ones(TRAIN, 1, CV_8UC1);

    // Train forest, print variable importance (if used)
    cout << "Trees: " << max_trees << endl;
    cout << "Depth: " << max_depth << endl;
    cout << "m: " << nactive_vars << endl;
    CvRTrees forest;
    forest.train(train_data, tflag, responses, var_idx, sample_idx, var_type, missing_data_mask, CvRTParams(max_depth, min_sample_count, regression_accuracy, use_surrogates, max_categories, priors, calc_var_importance, nactive_vars, max_trees, forest_accuracy, termcrit_type));
    if (calc_var_importance)
    {
	Mat imp = forest.getVarImportance();
	cout << endl << imp << endl << endl;
    }

    // Create solving array and data mask
    Int_t solve_good = event[0] - train_good;
    Int_t solve_bad = event[1] - train_bad;
    const Int_t SOLVE = solve_good + solve_bad;
    Int_t flag[SOLVE];
    Float_t solve[SOLVE][VARS];
    for (Int_t i = 0; i < solve_good; i++)
    {
	tree[0]->GetEvent(i + train_good);
	flag[i] = 0;
//	solve[i][0] = tibHits[0];
//	solve[i][1] = tidHits[1];
//	solve[i][2] = tobHits[1];
//	solve[i][3] = tecHits[0];
	solve[i][0] = ptR[0];
	solve[i][1] = etaR[0];
	solve[i][2] = phiR[0];
	solve[i][3] = foundR[0];
    }
    for (Int_t i = 0; i < solve_bad; i++)
    {
	tree[1]->GetEvent(i + train_bad);
	flag[i + solve_good] = 1;
//	solve[i + solve_good][0] = tibHits[1];
//	solve[i + solve_good][1] = tidHits[1];
//	solve[i + solve_good][2] = tobHits[1];
//	solve[i + solve_good][3] = tecHits[1]; 
	solve[i + solve_good][0] = ptR[1];
	solve[i + solve_good][1] = etaR[1];
	solve[i + solve_good][2] = phiR[1];
	solve[i + solve_good][3] = foundR[1];
    }
    Int_t miss_s[SOLVE][VARS];
    for (Int_t i = 0; i < SOLVE; i++)
    {
	for (Int_t j = 0; j < VARS; j++)
	    miss_s[i][j] = 0;
    }
/*
    for (Int_t i = 0; i < SOLVE; i++)
    {
	for (Int_t j = 0; j < VARS; j++)
	    cout << solve[i][j] << "\t";
	cout << endl;
    }
*/
    // Split solving data into 1d matrices and process each
    Float_t prediction[SOLVE];
    for (Int_t i = 0; i < SOLVE; i++)
    {
	Mat sample(VARS, 1, CV_32FC1, solve[i]);
	Mat missing(VARS, 1, CV_32SC1, miss_s[i]);
	missing.convertTo(missing, CV_8UC1);
	prediction[i] = forest.predict_prob(sample, missing);
    }

    // Create and fill histogram
    for (Int_t i = 0; i < SOLVE; i++)
    {	
	if (flag[i])
	    hist[0]->Fill(prediction[i]);
	else
	    hist[1]->Fill(prediction[i]);
    }
    
    // Calculate errors at specificity 0.5
    Double_t errors[4] = {0, 0, 0, 0};	// True pos, false pos, true neg, false neg
    for (Int_t i = 0; i < SOLVE; i++)
    {
	if (prediction[i] > 0.5)
	{
	    if (flag[i])
		errors[0]++;
	    else
		errors[1]++;
	}
	else
	{
	    if (!flag[i])
		errors[2]++;
	    else
		errors[3]++;
	}
    }

    Double_t perc_errors[] = {0, 0, 0, 0, 0};
    perc_errors[0] = errors[0] / (errors[0] + errors[1]) * 100;	// True pos
    perc_errors[1] = errors[1] / (errors[0] + errors[1]) * 100;	// False pos
    perc_errors[2] = errors[2] / (errors[2] + errors[3]) * 100;	// True neg
    perc_errors[3] = errors[3] / (errors[2] + errors[3]) * 100;	// False neg
    perc_errors[4] = (errors[1] + errors[3]) / (errors[0] + errors[1] + errors[2] + errors[3]) * 100;
    cout << "For specificity 0.5:" << endl;
    cout << "True bad tracks: " << perc_errors[0] << "%" << endl;    
    cout << "False bad tracks: " << perc_errors[1] << "%" << endl;
    cout << "True good tracks: " << perc_errors[2] << "%" << endl;
    cout << "False good tracks: " << perc_errors[3] << "%" << endl;
    cout << "Combined errors: " << perc_errors[4] << "%" << endl << endl;
    
    // Draw histograms
    c1->cd(1);
//    hist[0]->SetLineColor(color[a]);
    hist[0]->SetStats(0);
    hist[0]->Draw();
    hist[1]->SetLineColor(2);
    hist[1]->SetStats(0);
    hist[1]->Draw("SAME");

    // Make ROC curve
    Double_t eff[2][BINS];
    for (Int_t b = 0; b < BINS; b++)
    {
	eff[0][b] = hist[0]->Integral(1, b + 1) / hist[0]->Integral();
	eff[1][b] = hist[1]->Integral(1, b + 1) / hist[1]->Integral();
    }
    c1->cd(2);
    blank->Draw();
    graph = new TGraph(BINS, eff[1], eff[0]);
//    graph->SetLineColor(color[a]);
    graph->Draw("L");
//    legend->AddEntry(graph, title[a].c_str(), "LPF");
//    legend->Draw();

    // Create feature histograms
    featH[0][0] = new TH1D("feat 0 0", "ptR", 50, 0, 650);
    featH[1][0] = new TH1D("feat 1 0", "ptR", 50, 0, 650);
    featH[0][1] = new TH1D("feat 0 1", "etaR", 25, -0.1, 0.1);
    featH[1][1] = new TH1D("feat 1 1", "etaR", 25, -0.1, 0.1);
    featH[0][2] = new TH1D("feat 0 2", "phiR", 25, -0.1, 0.1);
    featH[1][2] = new TH1D("feat 1 2", "phiR", 25, -0.1, 0.1);
    featH[0][3] = new TH1D("feat 0 3", "foundR", 25, 0, 25);
    featH[1][3] = new TH1D("feat 1 3", "foundR", 25, 0, 25);
    
    for (Int_t h = 0; h < 2; h++)
    {
	for (Int_t i = 0; i < event[h]; i++)
	{
	    tree[h]->GetEvent(i);
	    featH[h][0]->Fill(ptR[h]);
	    featH[h][1]->Fill(etaR[h]);
	    featH[h][2]->Fill(phiR[h]);
	    featH[h][3]->Fill(foundR[h]);
	}
    }

    for (Int_t f = 0; f < 4; f++)
    {
	c2->cd(f + 1);
	featH[0][f]->Draw();
	featH[1][f]->SetLineColor(kRed);
	featH[1][f]->Draw("SAME");
    }

    c1->Write();
    c2->Write();
    canvas->Close();

    return 0;
}
int main( int argc, char** argv )
{
    // lets just check the version first

    printf ("OpenCV version %s (%d.%d.%d)\n",
            CV_VERSION,
            CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
    
    if(argc != 4)
    {
     	printf("Usage: %s file_training file_testing number_of_classes", argv[0]);
        exit(0);
    }

    //define number of training and testing samples and number of attributes
    int* results = find_parameters_from_csv(argv[1], argv[2]);
    
    int NUMBER_OF_TRAINING_SAMPLES = results[0] - 1;
    int NUMBER_OF_TESTING_SAMPLES = results[1] -1 ;
    int ATTRIBUTES_PER_SAMPLE = results[2];

    int NUMBER_OF_CLASSES = atoi(argv[3]);

    printf("N° of training samples: %d \nN° testing of samples: %d \nN° of attributes: %d \nN° of classes: %d \n", NUMBER_OF_TRAINING_SAMPLES,NUMBER_OF_TESTING_SAMPLES,ATTRIBUTES_PER_SAMPLE,NUMBER_OF_CLASSES );

    // define training data storage matrices (one for attribute examples, one
    // for classifications)

    Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
    Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);

    //define testing data storage matrices

    Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
    Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);

    // define all the attributes as numerical
    // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
    // that can be assigned on a per attribute basis

    Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U );
    var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical

    // this is a classification problem (i.e. predict a discrete number of class
    // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL

    var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;

    double result; // value returned from a prediction

    // load training and testing data sets

    if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE) &&
            read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE))
    {
        // define the parameters for training the random forest (trees)

  	// weights of each classification for classes
        // (all equal as equal samples of each digit)
        float priors[NUMBER_OF_CLASSES];
	for (int z = 0; z < NUMBER_OF_CLASSES; z++)
	{
		priors[z] = 1;
	}
	//dà peso 1 a ciascuna classe all'inizio

        CvRTParams params = CvRTParams(25, // max depth
                                       2, // min sample count
                                       0, // regression accuracy: N/A here
                                       false, // compute surrogate split, no missing data
                                       15, // max number of categories (use sub-optimal algorithm for larger numbers)
                                       priors, // the array of priors
                                       false,  // calculate variable importance
                                       4,       // number of variables randomly selected at node and used to find the best split(s).
                                       100,	 // max number of trees in the forest
                                       0.01f,				// forrest accuracy
                                       CV_TERMCRIT_ITER |	CV_TERMCRIT_EPS // termination cirteria
                                      );

        // train random forest classifier (using training data)

        printf( "\nUsing training database: %s\n\n", argv[1]);
        CvRTrees* rtree = new CvRTrees;

        rtree->train(training_data, CV_ROW_SAMPLE, training_classifications,
                     Mat(), Mat(), var_type, Mat(), params);

        // perform classifier testing and report results

        Mat test_sample;
        int correct_class = 0;
        int wrong_class = 0;
        int false_positives [NUMBER_OF_CLASSES];

	//initialize every element in false_positives to 0
	for (int z = 0; z < NUMBER_OF_CLASSES; z++)
        {
		false_positives[z] = 0;
	}

        printf( "\nUsing testing database: %s\n\n", argv[2]);

        for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
        {

            // extract a row from the testing matrix

            test_sample = testing_data.row(tsample);

            // run random forest prediction

            result = rtree->predict(test_sample, Mat());

            printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result);

            // if the prediction and the (true) testing classification are the same
            // (N.B. openCV uses a floating point decision tree implementation!)

            if (fabs(result - testing_classifications.at<float>(tsample, 0))
                    >= FLT_EPSILON)
            {
                // if they differ more than floating point error => wrong class

                wrong_class++;

                false_positives[(int) result]++;

            }
            else
            {

                // otherwise correct

                correct_class++;
            }
        }

        printf( "\nResults on the testing database: %s\n"
                "\tCorrect classification: %d (%g%%)\n"
                "\tWrong classifications: %d (%g%%)\n",
                argv[2],
                correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
                wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);

        for (int i = 0; i < NUMBER_OF_CLASSES; i++)
        {
            printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
                    false_positives[i],
                    (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
        }


        // all matrix memory free by destructors


        // all OK : main returns 0

        return 0;
    }

    // not OK : main returns -1

    return -1;
}