示例#1
0
int main()
{
    const int train_sample_count = 300;

//#define LEPIOTA
#ifdef LEPIOTA
    const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data";
#else
    const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data";
#endif

    CvDTree dtree;
    CvBoost boost;
    CvRTrees rtrees;
    CvERTrees ertrees;

    CvMLData data;

    CvTrainTestSplit spl( train_sample_count );
    
    data.read_csv( filename );

#ifdef LEPIOTA
    data.set_response_idx( 0 );     
#else
    data.set_response_idx( 21 );     
    data.change_var_type( 21, CV_VAR_CATEGORICAL );
#endif

    data.set_train_test_split( &spl );
    
    printf("======DTREE=====\n");
    dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 ));
    print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() );

#ifdef LEPIOTA
    printf("======BOOST=====\n");
    boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
    print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 );
#endif

    printf("======RTREES=====\n");
    rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() );

    printf("======ERTREES=====\n");
    ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() );

    return 0;
}
void CvRTDriver::createClassifier(const CvMat* data, const CvMat* responses, const CvMat* missing, float p_weight)
{
	CvMat* var_type;
	int i, success = 0, fail = 0;
	float priors[] = { 1, p_weight };

	var_type = cvCreateMat(data->cols + 1, 1, CV_8U);

#ifdef CLASSIFY
	cvSet(var_type, cvScalarAll(CV_VAR_CATEGORICAL)); // all the variables are categorical
#else
	cvSet(var_type, cvScalarAll(CV_VAR_NUMERICAL)); // all the variables are categorical
#endif

	rtree = new CvRTrees;
	rtree->train(data, CV_ROW_SAMPLE, responses, 0, 0, 0, missing,
		CvRTParams(20, // max depth
		10, // min sample count
		0.01f, // regression accuracy: N/A here
		false, // compute surrogate split, as we have missing data
		10, // max number of categories (use sub-optimal algorithm for larger numbers)
		0, // the array of priors
		false,  // calculate variable importance
		0,       // number of variables randomly selected at node and used to find the best split(s).
		210,     // max number of trees in the forest
		0.01f,                // forest accuracy
		CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria
		));
}
示例#3
0
CvRTrees* train_rf(CvMat* predictors, CvMat* labels)
{
	int stat[2];
	get_stat(labels, stat);
	printf("%d negative samples, %d positive samples\n", stat[0], stat[1]);
	
	const int tree_count = 500;
	const float priors[] = {0.25f,0.75f};
	CvRTrees* rtrees = new CvRTrees();
	CvRTParams rtparams = CvRTParams(5, 10, 0, false, 2, priors, true, 
									 (int)sqrt((float)predictors->cols), tree_count, 1e-6, 
									 CV_TERMCRIT_ITER + CV_TERMCRIT_EPS);
	CvMat* var_type = cvCreateMat(predictors->cols + 1, 1, CV_8UC1);
	for(int i = 0; i < predictors->cols; i++)
	{
		*(int*)(var_type->data.ptr + i*var_type->step) = CV_VAR_NUMERICAL;
	}
	*(int*)(var_type->data.ptr + predictors->cols*var_type->step) = CV_VAR_CATEGORICAL;
	rtrees->train(predictors, CV_ROW_SAMPLE, labels, 0, 0, var_type, 0, rtparams);
	return rtrees;
}
示例#4
0
RandomTrees::RandomTrees(const unsigned int no_of_features)
{
	this->is_modelfile_loaded_ = false;
	this->number_of_features_ = no_of_features;

	this->tree_parameters_ = CvRTParams(10, 10, 0, false, 10, 0, true, ((int)sqrt(no_of_features)), 100, 0.01f, CV_TERMCRIT_ITER);


	this->tree_parameters_.max_depth = INT_MAX;								// max levels in a tree
	this->tree_parameters_.min_sample_count = 10;							// dont split a node if lesser than this number
	this->tree_parameters_.regression_accuracy = 0;
	this->tree_parameters_.use_surrogates = false;
	this->tree_parameters_.max_categories = 10;
	this->tree_parameters_.priors = 0;
	this->tree_parameters_.calc_var_importance = true;						//true for better evaluation
	this->tree_parameters_.nactive_vars = ((int)sqrt(no_of_features));		//sqrt(number of features)
	//this->tree_parameters_.max_num_of_trees_in_the_forest;
	//this->tree_parameters_.forest_accuracy;
	//this->tree_parameters_.term_crit = CV_TERMCRIT_ITER;


	/*
    int _max_depth,
    int _min_sample_count,
    float _regression_accuracy,
    bool _use_surrogates,
    int _max_categories,
    const float* _priors,							NO PRIORS
    bool _calc_var_importance,
    int _nactive_vars,
    int max_num_of_trees_in_the_forest,
    float forest_accuracy,
    int termcrit_type
    */

}
示例#5
0
/** 
 * @author     	JIA Pei
 * @version    	2009-10-04
 * @brief      	Training
 * @param      	data     		Input - input data
 * @param		categories		Input - column vector
 * @return		classification time cost
*/
void CClassificationAlgs::Training(const Mat_<float>& data, const Mat_<int>& categories)
{
	unsigned int NbOfSamples = data.rows;
	set<int> ClassSet;
	for(int i = 0; i < categories.rows; i++)
	{
		ClassSet.insert(categories(i, 0));
	}
	this->m_iNbOfCategories = ClassSet.size();
	
	switch(this->m_iClassificationMethod)
	{
		case CClassificationAlgs::DecisionTree:
			this->m_CVDtree.train( 	data,
									CV_ROW_SAMPLE,
									categories,
									Mat(),
									Mat(),
									Mat(),
									Mat(),
									CvDTreeParams( INT_MAX, 2, 0, false, this->m_iNbOfCategories, 0, false, false, 0 ) );
		break;
		case CClassificationAlgs::Boost:
		    this->m_CVBoost.train( 	data,
									CV_ROW_SAMPLE,
									categories,
									Mat(),
									Mat(),
									Mat(),
									Mat(),
									CvBoostParams(CvBoost::DISCRETE, 50, 0.95, INT_MAX, false, 0),
									false );
		break;
		case CClassificationAlgs::RandomForest:
			this->m_CVRTrees.train( data, 
									CV_ROW_SAMPLE,
									categories,
									Mat(),
									Mat(),
									Mat(),
									Mat(),
									CvRTParams( INT_MAX, 2, 0, false, this->m_iNbOfCategories, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ) );
		break;
		case CClassificationAlgs::ExtremeRandomForest:
			this->m_CVERTrees.train(data,
									CV_ROW_SAMPLE,
									categories,
									Mat(),
									Mat(),
									Mat(),
									Mat(),
									CvRTParams( INT_MAX, 2, 0, false, this->m_iNbOfCategories, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ) );
		break;
		case CClassificationAlgs::SVM:
			this->m_CVSVM.train(	data,
									categories,
									Mat(),
									Mat(),
									CvSVMParams(CvSVM::C_SVC, CvSVM::RBF,
									0, 1, 0,
									1, 0, 0,
									NULL, cvTermCriteria(CV_TERMCRIT_ITER, 1000, 1E-6) ) );
		break;
	}
}
static
int build_rtrees_classifier( char* data_filename,
    char* filename_to_save, char* filename_to_load )
{
    CvMat* data = 0;
    CvMat* responses = 0;
    CvMat* var_type = 0;
    CvMat* sample_idx = 0;

    int ok = read_num_class_data( data_filename, 16, &data, &responses );
    int nsamples_all = 0, ntrain_samples = 0;
    int i = 0;
    double train_hr = 0, test_hr = 0;
    CvRTrees forest;
    CvMat* var_importance = 0;

    if( !ok )
    {
        printf( "Could not read the database %s\n", data_filename );
        return -1;
    }

    printf( "The database %s is loaded.\n", data_filename );
    nsamples_all = data->rows;
    ntrain_samples = (int)(nsamples_all*0.8);

    // Create or load Random Trees classifier
    if( filename_to_load )
    {
        // load classifier from the specified file
        forest.load( filename_to_load );
        ntrain_samples = 0;
        if( forest.get_tree_count() == 0 )
        {
            printf( "Could not read the classifier %s\n", filename_to_load );
            return -1;
        }
        printf( "The classifier %s is loaded.\n", data_filename );
    }
    else
    {
        // create classifier by using <data> and <responses>
        printf( "Training the classifier ...\n");

        // 1. create type mask
        var_type = cvCreateMat( data->cols + 1, 1, CV_8U );
        cvSet( var_type, cvScalarAll(CV_VAR_ORDERED) );
        cvSetReal1D( var_type, data->cols, CV_VAR_CATEGORICAL );

        // 2. create sample_idx
        sample_idx = cvCreateMat( 1, nsamples_all, CV_8UC1 );
        {
            CvMat mat;
            cvGetCols( sample_idx, &mat, 0, ntrain_samples );
            cvSet( &mat, cvRealScalar(1) );

            cvGetCols( sample_idx, &mat, ntrain_samples, nsamples_all );
            cvSetZero( &mat );
        }

        // 3. train classifier
        forest.train( data, CV_ROW_SAMPLE, responses, 0, sample_idx, var_type, 0,
            CvRTParams(10,10,0,false,15,0,true,4,100,0.01f,CV_TERMCRIT_ITER));
        printf( "\n");
    }

    // compute prediction error on train and test data
    for( i = 0; i < nsamples_all; i++ )
    {
        double r;
        CvMat sample;
        cvGetRow( data, &sample, i );

        r = forest.predict( &sample );
        r = fabs((double)r - responses->data.fl[i]) <= FLT_EPSILON ? 1 : 0;

        if( i < ntrain_samples )
            train_hr += r;
        else
            test_hr += r;
    }

    test_hr /= (double)(nsamples_all-ntrain_samples);
    train_hr /= (double)ntrain_samples;
    printf( "Recognition rate: train = %.1f%%, test = %.1f%%\n",
            train_hr*100., test_hr*100. );

    printf( "Number of trees: %d\n", forest.get_tree_count() );

    // Print variable importance
    var_importance = (CvMat*)forest.get_var_importance();
    if( var_importance )
    {
        double rt_imp_sum = cvSum( var_importance ).val[0];
        printf("var#\timportance (in %%):\n");
        for( i = 0; i < var_importance->cols; i++ )
            printf( "%-2d\t%-4.1f\n", i,
            100.f*var_importance->data.fl[i]/rt_imp_sum);
    }

    //Print some proximitites
    printf( "Proximities between some samples corresponding to the letter 'T':\n" );
    {
        CvMat sample1, sample2;
        const int pairs[][2] = {{0,103}, {0,106}, {106,103}, {-1,-1}};

        for( i = 0; pairs[i][0] >= 0; i++ )
        {
            cvGetRow( data, &sample1, pairs[i][0] );
            cvGetRow( data, &sample2, pairs[i][1] );
            printf( "proximity(%d,%d) = %.1f%%\n", pairs[i][0], pairs[i][1],
                forest.get_proximity( &sample1, &sample2 )*100. );
        }
    }

    // Save Random Trees classifier to file if needed
    if( filename_to_save )
        forest.save( filename_to_save );

    cvReleaseMat( &sample_idx );
    cvReleaseMat( &var_type );
    cvReleaseMat( &data );
    cvReleaseMat( &responses );

    return 0;
}
示例#7
0
int main(int argc, char** argv)
{
  // std::cout<<FLT_EPSILON<<std::endl; 
  cv::Mat training_data, training_labels,testing_data, testing_labels;
  
  training_data = read_rgbd_data_cv(argv[1],NUMBER_OF_TRAINING_SAMPLES);
  training_labels = read_rgbd_data_cv(argv[2], NUMBER_OF_TRAINING_SAMPLES);
  testing_data = read_rgbd_data_cv(argv[3],NUMBER_OF_TESTING_SAMPLES);
  testing_labels = read_rgbd_data_cv(argv[4], NUMBER_OF_TESTING_SAMPLES);
  
 
  printf("dataset specs: %d samples with %d features\n", training_data.rows, training_data.cols);

  // define all the attributes as numerical
  // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
  // that can be assigned on a per attribute basis

  cv::Mat var_type = cv::Mat(training_data.cols + 1, 1, CV_8U );
  var_type.setTo(cv::Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical
  var_type.at<uchar>(training_data.cols, 0) = CV_VAR_CATEGORICAL; // the labels are categorical

  /********************************步骤1:定义初始化Random Trees的参数******************************/
  float priors[] = {1,1,1,1,1};  // weights of each classification for classes
  CvRTParams params = CvRTParams(25, // max depth
				 50, // min sample count
				 0, // regression accuracy: N/A here
				 false, // compute surrogate split, no missing data
				 15, // max number of categories (use sub-optimal algorithm for larger numbers)
				 priors, // the array of priors
				 false,  // calculate variable importance
				 20,       // number of variables randomly selected at node and used to find the best split(s).
				 NUMBER_OF_TREES,	 // max number of trees in the forest
				 0.01f,				// forrest accuracy
				 CV_TERMCRIT_ITER |	CV_TERMCRIT_EPS // termination cirteria
				 );
  
  /****************************步骤2:训练 Random Decision Forest(RDF)分类器*********************/
  // printf( "\nUsing training database: %s\n\n", argv[1]);
  CvRTrees* rtree = new CvRTrees;
  rtree->train(training_data, CV_ROW_SAMPLE, training_labels,
	       cv::Mat(), cv::Mat(), var_type, cv::Mat(), params);
  
  // perform classifier testing and report results
  cv::Mat test_sample, train_sample;
  int correct_class = 0;
  int wrong_class = 0;
  int result;
  int label;
  int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0};
  int false_negatives [NUMBER_OF_CLASSES] = {0,0,0,0,0};

  CvDTreeNode* leaf_nodes [training_data.rows];

  for (int tsample = 0; tsample < training_data.rows; tsample++)
    {
      train_sample = training_data.row(tsample);
      CvForestTree* tree = rtree->get_tree(1);
      CvDTreeNode* leaf_node = tree->predict(train_sample, cv::Mat());
      leaf_nodes[tsample] = leaf_node; 
    }

  // printf( "\nUsing testing database: %s\n\n", argv[2]);

  for (int tsample = 0; tsample < testing_data.rows; tsample++)
    {	       
      // extract a row from the testing matrix
      test_sample = testing_data.row(tsample);
      // train on the testing data:
      // test_sample = training_data.row(tsample);
      /********************************步骤3:预测*********************************************/

      result = (int) rtree->predict(test_sample, cv::Mat());
      label = (int) testing_labels.at<float>(tsample, 0);

      printf("Testing Sample %i -> class result (digit %d) - label (digit %d)\n", tsample, result, label);

      // get the leaf nodes of the first tree in the forest
      /*CvForestTree* tree = rtree->get_tree(0);
      std::list<const CvDTreeNode*> leaf_list;
      leaf_list = get_leaf_node( tree );
      printf("Number of Leaf nodes: %ld\n", leaf_list.size());*/

      // if the prediction and the (true) testing classification are the same
      // (N.B. openCV uses a floating point decision tree implementation!)
      if (fabs(result - label)
	  >= FLT_EPSILON)
	{
	  // if they differ more than floating point error => wrong class
	  wrong_class++;
	  false_positives[(int) result]++;
	  false_negatives[(int) testing_labels.at<float>(tsample, 0)]++;
	}
      else
	{
	  // otherwise correct
	  correct_class++;
	}
    }

  printf( // "\nResults on the testing database: %s\n"
	 "\tCorrect classification: %d (%g%%)\n"
	 "\tWrong classifications: %d (%g%%)\n",
	 // argv[2],
	 correct_class, (double) correct_class*100/testing_data.rows,
	 wrong_class, (double) wrong_class*100/testing_data.rows);

  for (int i = 0; i < NUMBER_OF_CLASSES; i++)
    {
      printf( "\tClass (digit %d) false postives 	%d (%g%%)\n\t                false negatives  %d (%g%%)\n", i,
	      false_positives[i],
	      (double) false_positives[i]*100/testing_data.rows,
	      false_negatives[i],
	      (double) false_negatives[i]*100/testing_data.rows);
    }

	// get all the leaf nodes in the forest
   for (int i = 0; i < NUMBER_OF_TREES; i ++)
	{ 
      	CvForestTree* tree = rtree->get_tree(i);
      	std::list<const CvDTreeNode*> leaf_list;
      	leaf_list = get_leaf_node( tree );
	}
  	//get training_sample indices for leaf nodes
  std::list<leaf_samples> node_indices;
  for (int i = 0; i < training_data.rows; i++) 
    {
      CvDTreeNode* leaf_node = leaf_nodes[i];

      if (leaf_node != NULL) 
	  {
		leaf_samples leaf_sample;
		leaf_sample.leaf = leaf_node;
		leaf_sample.indices.push_front(i);
		printf("\nValue of leaf: %f\n", leaf_node->value);
		printf("Smaple indices for leaf:\n");
		printf(" %d", i);

		for (int j=i+1; j < training_data.rows; j++) 
	  	{
	    	if (leaf_node == leaf_nodes[j])
			{
	      		leaf_sample.indices.push_front(j);
	      		printf(" %lu", j);
	      		leaf_nodes[j] = NULL;
	    	}
	  	}
		node_indices.push_front(leaf_sample);      
      }
    }
  	printf("\nSize of node_indices: %d\n", node_indices.size()); 
	//get labels and features
	

  //get double pointers for features and labels
  const double* p = testing_data.ptr<double>(0);
  std::vector<double> vec(p, p + testing_data.cols);

  
  // all matrix memory free by destructors

  // all OK : main returns 0
  // result = rtree->predict(testing_data.row(79), cv::Mat());
  // float andi = result - testing_labels.at<float>(79, 0);
  // // std::cout<<training_labels.row(0).col(0)<<std::endl;
  // std::cout<<andi<<std::endl;
  return 0;
}
void OpencvRFclassifier::learn(std::vector< std::vector<float> >& pfeatures, std::vector<int>& plabels){

     if (_rf){
	delete _rf;
	_trees.clear();	
	_tree_weights.clear();
     }
     _rf = new CvRTrees;
 	

     int rows = pfeatures.size();
     int cols = pfeatures[0].size();	 	
     
     printf("Number of samples and dimensions: %d, %d\n",rows, cols);
     if ((rows<1)||(cols<1)){
	return;
     }

//      clock_t start = clock();    
     std::time_t start, end;
     std::time(&start);	


     CvMat *features = cvCreateMat(rows, cols, CV_32F);	
     CvMat *labels = cvCreateMat(rows, 1 , CV_32F);	
     float* datap = features->data.fl;
     float* labelp = labels->data.fl;	 	


     int numzeros=0; 	
     for(int i=0; i < rows; i++){
	 labelp[i] = plabels[i];
	 numzeros += ( labelp[i] == -1? 1 : 0 );
	 for(int j=0; j < cols ; j++){
	     datap[i*cols+j]  = (float)pfeatures[i][j];	
	 }
     }
     printf("Number of merge: %d\n",numzeros);



     // 1. create type mask
     CvMat* var_type = cvCreateMat( features->cols + 1, 1, CV_8U );
     cvSet( var_type, cvScalarAll(CV_VAR_NUMERICAL) );
     cvSetReal1D( var_type, features->cols, CV_VAR_CATEGORICAL );

     // define the parameters for training the random forest (trees)

     float priors[] = {1,1};  // weights of each classification for classes
     // (all equal as equal samples of each digit)

     CvRTParams params = CvRTParams( _max_depth, // max depth :the depth of the tree
                                     10, // min sample count:  minimum samples required at a leaf node for it to be split
                                     0, // regression accuracy: N/A here
                                     false, // compute surrogate split, no missing data
                                     15, // max number of categories (use sub-optimal algorithm for larger numbers)
                                     priors, // the array of prior for each class
                                     false,  // calculate variable importance
                                     5,       // number of variables randomly selected at node and used to find the best split(s).
                                     _tree_count,	 // max number of trees in the forest
                                     0.001f, // forest accuracy
                                     CV_TERMCRIT_ITER //|	CV_TERMCRIT_EPS // termination cirteria
                                      );
	

     // 3. train classifier
     _rf->train( features, CV_ROW_SAMPLE, labels, 0, 0, var_type, 0, params);
         //CvRTParams(10,10,0,false,15,0,true,4,100,0.01f,CV_TERMCRIT_ITER));

     float correct = 0;
     for(int  i = 0; i < features->rows ; i++ ){
         float r;
         CvMat sample;
         cvGetRow( features, &sample, i );
	
         r = _rf->predict_prob( &sample );
	 r = (r>0.5)? 1 :-1;
         r = fabs((float)r - labels->data.fl[i]) <= FLT_EPSILON ? 1 : 0;


	 correct += r;
      }
      	

    std::time(&end);
    printf("Time required to learn RF: %.2f sec\n", (difftime(end,start))*1.0);
//     printf("Time required to learn RF: %.2f sec\n", ((float)clock() - start) / CLOCKS_PER_SEC);
    printf("with training set accuracy :%.3f\n", correct/features->rows*100.);

    _tree_count = _rf->get_tree_count();	
    for(int i = 0; i < _tree_count; i++){
	CvForestTree* treep = _rf->get_tree(i);
	_trees.push_back(treep); 	
    }
    //int ntrees = _rf->get_tree_count();	
    _tree_weights.resize(_tree_count, 1.0/_tree_count); 		

     cvReleaseMat( &features );
     cvReleaseMat( &labels );	
     cvReleaseMat( &var_type );	
}
示例#9
0
int main()
{
    const int train_sample_count = 300;
    bool is_regression = false;

    const char* filename = "data/waveform.data";
    int response_idx = 21;

    CvMLData data;

    CvTrainTestSplit spl( train_sample_count );
    
    if(data.read_csv(filename) != 0)
    {
        printf("couldn't read %s\n", filename);
        exit(0);
    }

    data.set_response_idx(response_idx);
    data.change_var_type(response_idx, CV_VAR_CATEGORICAL);
    data.set_train_test_split( &spl );

    const CvMat* values = data.get_values();
    const CvMat* response = data.get_responses();
    const CvMat* missing = data.get_missing();
    const CvMat* var_types = data.get_var_types();
    const CvMat* train_sidx = data.get_train_sample_idx();
    const CvMat* var_idx = data.get_var_idx();
    CvMat*response_map;
    CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL);
    int num_classes = response_map->cols;
    
    CvDTree dtree;
    printf("======DTREE=====\n");
    CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0);
    dtree.train( &data, cvd_params);
    print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() );

#if 0
    /* boosted trees are only implemented for two classes */
    printf("======BOOST=====\n");
    CvBoost boost;
    boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
    print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 );
#endif

    printf("======RTREES=====\n");
    CvRTrees rtrees;
    rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() );

    printf("======ERTREES=====\n");
    CvERTrees ertrees;
    ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() );

    printf("======GBTREES=====\n");
    CvGBTrees gbtrees;
    CvGBTreesParams gbparams;
    gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression
    gbtrees.train( &data, gbparams);
    
    //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx);
    print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0);

    printf("======KNEAREST=====\n");
    CvKNearest knearest;
    //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses,
    //                const Mat& _sample_idx, bool _is_regression,
    //                int _max_k, bool _update_base )
    bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL;
    assert(is_classifier);
    int max_k = 10;
    knearest.train(values, response, train_sidx, is_regression, max_k, false);

    CvMat* new_response = cvCreateMat(response->rows, 1, values->type);
    //print_types();

    //const CvMat* train_sidx = data.get_train_sample_idx();
    knearest.find_nearest(values, max_k, new_response, 0, 0, 0);

    print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR),
                 knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0);

    printf("======== RBF SVM =======\n");
    //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows);
    CvMySVM svm1;
    CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF,
                                     /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm1.train(values, response, train_sidx, var_idx, params1);
    svm1.train_auto(values, response, var_idx, train_sidx, params1);
    svm_print_error(&svm1, values, response, response_idx, train_sidx);

    printf("======== Linear SVM =======\n");
    CvMySVM svm2;
    CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR,
                                     /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm2.train(values, response, train_sidx, var_idx, params2);
    svm2.train_auto(values, response, var_idx, train_sidx, params2);
    svm_print_error(&svm2, values, response, response_idx, train_sidx);

    printf("======NEURONAL NETWORK=====\n");

    int num_layers = 3;
    CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1));
    cvmSetI(&layers, 0, 0, values->cols-1);
    cvmSetI(&layers, 0, 1, num_classes);
    cvmSetI(&layers, 0, 2, num_classes);
    CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0);
    CvANN_MLP_TrainParams ann_params;
    //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP;
    CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes);

    CvMat values2 = cvmat_remove_column(values, response_idx);
    ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000);
    //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000);

    ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx);

#if 0 /* slow */

    printf("======== Polygonal SVM =======\n");
    //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows);
    CvMySVM svm3;
    CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY,
                                     /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm3.train(values, response, train_sidx, var_idx, params3);
    svm3.train_auto(values, response, var_idx, train_sidx, params3);
    svm_print_error(&svm3, values, response, response_idx, train_sidx);
#endif

    return 0;
}
示例#10
0
文件: aertrees.cpp 项目: glo/ee384b
int CV_ERTreesTest :: train( int test_case_idx )
{
    int MAX_DEPTH, MIN_SAMPLE_COUNT, MAX_CATEGORIES, CV_FOLDS, NACTIVE_VARS, MAX_TREES_NUM;
    float REG_ACCURACY = 0, OOB_EPS = 0.0;
    bool USE_SURROGATE, IS_PRUNED;
    const char* data_name = ((CvFileNode*)cvGetSeqElem( data_sets_names, test_case_idx ))->data.str.ptr;     

    // read validation params
    CvFileStorage* fs = ts->get_file_storage();
    CvFileNode* fnode = cvGetFileNodeByName( fs, 0, "validation" ), *fnode1 = 0;
    fnode = cvGetFileNodeByName( fs, fnode, name );
    fnode = cvGetFileNodeByName( fs, fnode, data_name );
    fnode = cvGetFileNodeByName( fs, fnode, "model_params" );
    fnode1 = cvGetFileNodeByName( fs, fnode, "max_depth" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "MAX_DEPTH can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    MAX_DEPTH = fnode1->data.i;
    fnode1 = cvGetFileNodeByName( fs, fnode, "min_sample_count" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "MIN_SAMPLE_COUNT can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    MIN_SAMPLE_COUNT = fnode1->data.i;
    fnode1 = cvGetFileNodeByName( fs, fnode, "use_surrogate" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "USE_SURROGATE can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    USE_SURROGATE = (fnode1->data.i != 0);
    fnode1 = cvGetFileNodeByName( fs, fnode, "max_categories" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "MAX_CATEGORIES can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    MAX_CATEGORIES = fnode1->data.i;
    fnode1 = cvGetFileNodeByName( fs, fnode, "cv_folds" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "CV_FOLDS can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    CV_FOLDS = fnode1->data.i;
    fnode1 = cvGetFileNodeByName( fs, fnode, "is_pruned" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "IS_PRUNED can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    IS_PRUNED = (fnode1->data.i != 0);
    fnode1 = cvGetFileNodeByName( fs, fnode, "nactive_vars" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "NACTIVE_VARS can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    NACTIVE_VARS = fnode1->data.i;
    fnode1 = cvGetFileNodeByName( fs, fnode, "max_trees_num" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "MAX_TREES_NUM can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    MAX_TREES_NUM = fnode1->data.i;

    if ( !ertrees->train( &data, CvRTParams( MAX_DEPTH, MIN_SAMPLE_COUNT, REG_ACCURACY,
            USE_SURROGATE, MAX_CATEGORIES, 0, false, // (calc_var_importance == true) <=> RF processes variable importance
            NACTIVE_VARS, MAX_TREES_NUM, OOB_EPS, CV_TERMCRIT_ITER)) )
    {
        ts->printf( CvTS::LOG, "in test case %d model training  was failed", test_case_idx );
        return CvTS::FAIL_INVALID_OUTPUT;
    }
    return CvTS::OK;
}