示例#1
0
void svm_train(const FeatureCollection& features,const ICDAR2011DataSet& dataset ,vector<int>& false_idx) {
	size_t feature_count = features.size();
	int validation_feature_count = 5000;
    size_t dim = features[0].featureArray.size();
	Mat train_data(feature_count - validation_feature_count, dim, CV_32F);
    Mat train_label(feature_count - validation_feature_count, 1, CV_32F);
	
	for(int feature_idx = 0; feature_idx < feature_count - validation_feature_count; feature_idx++) {
		float sum_norm = 0;
		if (NORM) 
		    sum_norm = accumulate(features[feature_idx].featureArray.begin(),features[feature_idx].featureArray.end(),0.0);
		else {
		    for (int i = 0;i< dim;i++) 
		        sum_norm += features[feature_idx].featureArray[i] * features[feature_idx].featureArray[i];
		    sum_norm = sqrt(sum_norm);
		}
		for(int dim_idx = 0; dim_idx < dim; dim_idx++) {
			FeatureAtPoint fea = features[feature_idx];
			train_data.at<float>(feature_idx, dim_idx) = features[feature_idx].featureArray[dim_idx]/sum_norm;
		}
		train_label.at<float>(feature_idx) = features[feature_idx].label;
    }
	
	std::cout<<"svm train-------------------------------------"<<std::endl; 
	CvSVM SVM;
	CvBoost boost;  
	CvBoostParams params = CvBoostParams(CvBoost::REAL, 50, 0.95, 5, false, 0 );	
	boost.train(train_data, CV_ROW_SAMPLE,train_label, Mat(), Mat(), Mat(),Mat(),params);
	
	std::cout<<"svm validad-----------------------------------"<<std::endl;
    // Calculate trainning error
	cout<<feature_count<<endl;

	Mat test_data(1, dim, CV_32F);
    float predict_correct = 0, positive_cnt = 0, negative_cnt = 0;
	for(int feature_idx = feature_count - validation_feature_count; feature_idx < feature_count; feature_idx++) {
        float sum_norm = 0 ;
        if (NORM) 
            sum_norm = accumulate(features[feature_idx].featureArray.begin(),features[feature_idx].featureArray.end(),0.0);
        else {
            for (int i = 0;i< dim;i++) 
                sum_norm += features[feature_idx].featureArray[i] * features[feature_idx].featureArray[i];
            sum_norm = sqrt(sum_norm);
        }
        
        for(int dim_idx = 0; dim_idx < dim; dim_idx++) {
            test_data.at<float>(0, dim_idx) = features[feature_idx].featureArray[dim_idx]/sum_norm;
        }
		
		if(features[feature_idx].label == boost.predict(test_data)) {
			predict_correct++;
		} else {
			false_idx.push_back(feature_idx);
		}
        positive_cnt += features[feature_idx].label == 1 ? 1 : 0;
        negative_cnt += features[feature_idx].label == 0 ? 1 : 0;
    }
	cout << "Training accuracy:" << predict_correct / 5000 << " pos_cnt:" << positive_cnt << " neg_cnt:" << negative_cnt << endl;
	boost.save(((dataset.model_dir + "boost.model").c_str()));
}
int main(int argc, char** argv) {

//Read the data from csv file
CvMLData cvml;
cvml.read_csv("char_datasetNM2.csv");
//Indicate which column is the response
cvml.set_response_idx(0);


//Select 50% for the training 
CvTrainTestSplit cvtts(0.8f, true);
//Assign the division to the data
cvml.set_train_test_split(&cvtts);

CvBoost boost;

ifstream ifile("./trained_classifierNM2.xml");
if (ifile) 
{
	//The file exists, so we don't want to train 
	printf("Found trained_boost_char.xml file, remove it if you want to retrain with new data ... \n");
	boost.load("./trained_classifierNM2.xml", "boost");
} else {
	//Train with 100 features
	printf("Training ... \n");
	boost.train(&cvml, CvBoostParams(CvBoost::REAL, 100, 0, 1, false, 0), false);
}

//Calculate the test and train errors
std::vector<float> train_responses, test_responses;
float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses);
float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses);
printf("Error train %f \n", fl1);
printf("Error test %f \n", fl2);


//Try a char
static const float arr[] = {0,0.870690,0.096485,2.000000,2.000000,0.137080,1.269940,2.000000};
vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false );
float votes      = boost.predict( Mat(sample), Mat(), Range::all(), false, true );

printf("\n The char sample is predicted as: %f (with number of votes = %f)\n", prediction,votes);
printf(" Class probability (using Logistic Correction) is P(r|character) = %f\n", (float)1-(float)1/(1+exp(-2*votes)));

//Try a NONchar
//static const float arr2[] = {0,1.500000,0.072162,0.000000,8.000000,0.188095,1.578947,16.000000};
static const float arr2[] = {0,0.565217,0.103749,1.000000,2.000000,0.032258,1.525692,10.000000};
vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) );
prediction = boost.predict( Mat(sample2), Mat(), Range::all(), false, false );
votes      = boost.predict( Mat(sample2), Mat(), Range::all(), false, true );

printf("\n The non_char sample is predicted as: %f (with number of votes = %f)\n", prediction,votes);
printf(" Class probability (using Logistic Correction) is P(r|character) = %f\n\n", (float)1-(float)1/(1+exp(-2*votes)));

// Save the trained classifier
boost.save("./trained_classifierNM2.xml", "boost");

return EXIT_SUCCESS;
}
示例#3
0
int main()
{
    const int train_sample_count = 300;

//#define LEPIOTA
#ifdef LEPIOTA
    const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data";
#else
    const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data";
#endif

    CvDTree dtree;
    CvBoost boost;
    CvRTrees rtrees;
    CvERTrees ertrees;

    CvMLData data;

    CvTrainTestSplit spl( train_sample_count );
    
    data.read_csv( filename );

#ifdef LEPIOTA
    data.set_response_idx( 0 );     
#else
    data.set_response_idx( 21 );     
    data.change_var_type( 21, CV_VAR_CATEGORICAL );
#endif

    data.set_train_test_split( &spl );
    
    printf("======DTREE=====\n");
    dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 ));
    print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() );

#ifdef LEPIOTA
    printf("======BOOST=====\n");
    boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
    print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 );
#endif

    printf("======RTREES=====\n");
    rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() );

    printf("======ERTREES=====\n");
    ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() );

    return 0;
}
示例#4
0
int SvmTest( const ICDAR2011DataSet& dataset, FeatureCollection& features , const vector<int>&ProposalLen, const vector< vector<Rect> >&Proposal) {
	long sumProposal = accumulate( ProposalLen.begin(), ProposalLen.end(), 0 );
	assert( sumProposal == features.size() );
	
	CvBoost boost;  
	boost.load((dataset.model_dir + "boost.model").c_str());
	
	for( size_t image_idx = 0; image_idx < ProposalLen.size() ;image_idx ++ ) {
		string filename = dataset.test_set[ image_idx ];
		string resultPath = dataset.result_dir + CmFile::GetFileNameWithoutExtension( filename ) + ".txt"; 
		cout<<"result save path "<<resultPath<<" "<<image_idx<<" of "<<dataset.test_num<<endl;

		ofstream score_out( resultPath.c_str() );
		int num = ProposalLen[ image_idx ];
		vector<Rect>proposal = Proposal[ image_idx ];
		assert( num == proposal.size() );

		long StartIndex = 0;
		for (int i = 0;i < num ;i++) {
			FeatureAtPoint featurePoint = features[ i + StartIndex ];
			int dim = featurePoint.featureArray.size();

			Mat test_data(1, dim, CV_32F);
			double sum_norm = 0;
			if (NORM) {
				sum_norm = accumulate(featurePoint.featureArray.begin(),featurePoint.featureArray.end(),0.0);
			} else {
				for (int i =  0;i< dim;i++)  
					sum_norm += featurePoint.featureArray[i] * featurePoint.featureArray[i];
				sum_norm = sqrt(sum_norm);
			}
			vector<float>sample;
			for (int dim_idx = 0;dim_idx < dim;dim_idx++) {
				test_data.at<float>(0, dim_idx) = featurePoint.featureArray[dim_idx]/ sum_norm;
				sample.push_back(featurePoint.featureArray[dim_idx]/sum_norm);
			}
			double score = boost.predict(test_data,Mat(),Range::all(),false,true);
	//		score = score * (-1);
			score_out<< proposal[i].x << " " << proposal[i].y << " "
				<< proposal[i].width << " "  << proposal[i].height << " "
				<< score << endl;
		}                
		StartIndex += num;
		score_out.close();
	}
}
示例#5
0
//目前只能进行两分类的识别,否则会抛出异常
void Model::Predict_boosting( const SampleSet& samples, SampleSet& outError )
{
	int true_resp = 0;
	CvBoost *model = (CvBoost*)m_pModel;

	for (int i = 0; i < samples.N(); i++)
	{
		float ret = model->predict(samples.GetSampleAt(i), cv::Mat(), cv::Range::all());
		if (ret != samples.GetLabelAt(i))
		{
			outError.Add(samples.GetSampleAt(i), samples.GetLabelAt(i));
		}
		else
		{
			true_resp++;
		}
	}
	printf("%d %d",samples.N(), true_resp);
}
示例#6
0
int main(int argc, char** argv) {

	generateData();

	/* STEP 2. Opening the file */
	//1. Declare a structure to keep the data
	CvMLData cvml;
	//2. Read the file
	cvml.read_csv("samples.csv");
	//3. Indicate which column is the response
	cvml.set_response_idx(0);

	/* STEP 3. Splitting the samples */
	//1. Select 40 for the training
	CvTrainTestSplit cvtts(15, true);
	//2. Assign the division to the data
	cvml.set_train_test_split(&cvtts);

	printf("Training ... ");
	/* STEP 4. The training */
	//1. Declare the classifier
	CvBoost boost;
	//2. Train it with 100 features
	boost.train(&cvml, CvBoostParams(CvBoost::REAL, 100, 0, 1, false, 0), false);

	/* STEP 5. Calculating the testing and training error */
	// 1. Declare a couple of vectors to save the predictions of each sample
	vector<float> train_responses; 
	vector<float> test_responses;
	// 2. Calculate the training error
	float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses);
	// 3. Calculate the test error
	float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses);
	printf("Error train %f \n", fl1);
	printf("Error test %f \n", fl2);

	/* STEP 6. Save your classifier */
	// Save the trained classifier
	boost.save("./trained_boost.xml", "boost");

	return EXIT_SUCCESS;
}
示例#7
0
void find_decision_boundary_BT()
{
    img.copyTo( imgDst );

    Mat trainSamples, trainClasses;
    prepare_train_data( trainSamples, trainClasses );

    // learn classifier
    CvBoost  boost;

    Mat var_types( 1, trainSamples.cols + 1, CV_8UC1, Scalar(CV_VAR_ORDERED) );
    var_types.at<uchar>( trainSamples.cols ) = CV_VAR_CATEGORICAL;

    CvBoostParams  params( CvBoost::DISCRETE, // boost_type
                           100, // weak_count
                           0.95, // weight_trim_rate
                           2, // max_depth
                           false, //use_surrogates
                           0 // priors
                         );

    boost.train( trainSamples, CV_ROW_SAMPLE, trainClasses, Mat(), Mat(), var_types, Mat(), params );

    Mat testSample(1, 2, CV_32FC1 );
    for( int y = 0; y < img.rows; y += testStep )
    {
        for( int x = 0; x < img.cols; x += testStep )
        {
            testSample.at<float>(0) = (float)x;
            testSample.at<float>(1) = (float)y;

            int response = (int)boost.predict( testSample );
            circle( imgDst, Point(x,y), 2, classColors[response], 1 );
        }
    }
}
int main(int argc, char** argv) {

/* STEP 2. Opening the file */
//1. Declare a structure to keep the data
CvMLData cvml;

//2. Read the file
cvml.read_csv("groups_dataset.csv");
//cvml.read_csv("strokes_dataset_noresized.csv");

//3. Indicate which column is the response
cvml.set_response_idx(0);


/* STEP 3. Splitting the samples */
//1. Select 50% for the training (an integer value is also allowed here)
CvTrainTestSplit cvtts(0.9f, true);
//2. Assign the division to the data
cvml.set_train_test_split(&cvtts);

/* STEP 4. The training */
//1. Declare the classifier
CvBoost boost;

ifstream ifile("./trained_boost_groups.xml");
if (ifile) 
{
	// The file exists, so we don't need to train 
	boost.load("./trained_boost_groups.xml", "boost");
} else {
	//2. Train it with 100 features
	printf("Training ... \n");
	boost.train(&cvml, CvBoostParams(CvBoost::REAL, 500, 0, 1, false, 0), false);
}

/* STEP 5. Calculating the testing and training error */
// 1. Declare a couple of vectors to save the predictions of each sample
std::vector<float> train_responses, test_responses;
// 2. Calculate the training error
float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses);
// 3. Calculate the test error
float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses);
printf("Error train %f \n", fl1);
printf("Error test %f \n", fl2);

static const float arr[] = {0,-1.980394,1.249858,-0.631116,2.819193,0.305448,0.108346,0.801116,0.104873,0.130908,0.559806,0.255053,0.455610,0.294118,0.455645,1.549193,0.087770,0.144896,1.650866};
vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false );
float votes      = boost.predict( Mat(sample), Mat(), Range::all(), false, true );

printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction,votes);

//static const float arr2[] = {0,0.911369,1.052156,1.154478,3.321924,0.829768,0.249785,0.616930,0.246637,0.399782,0.337159,0.103893,0.308142,0.666667,0.745356,1.118034,0.009747,0.011016,1.130162};
static const float arr2[] = {0,1.14335,3.00412,2.62747,3.26428,2.32749,0.713018,0.47244,0.289846,0.613508,0.40514,0.216716,0.53305,0.878788,3.21698,3.6607,0.0422318,0.114392,2.70868};
vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) );
float prediction2 = boost.predict( Mat(sample2), Mat(), Range::all(), false, false );
float votes2      = boost.predict( Mat(sample2), Mat(), Range::all(), false, true );

printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction2,votes2);

/* STEP 6. Save your classifier */
// Save the trained classifier
boost.save("./trained_boost_groups.xml", "boost");

return EXIT_SUCCESS;
}
static
int build_boost_classifier( char* data_filename,
    char* filename_to_save, char* filename_to_load )
{
    const int class_count = 26;
    CvMat* data = 0;
    CvMat* responses = 0;
    CvMat* var_type = 0;
    CvMat* temp_sample = 0;
    CvMat* weak_responses = 0;

    int ok = read_num_class_data( data_filename, 16, &data, &responses );
    int nsamples_all = 0, ntrain_samples = 0;
    int var_count;
    int i, j, k;
    double train_hr = 0, test_hr = 0;
    CvBoost boost;

    if( !ok )
    {
        printf( "Could not read the database %s\n", data_filename );
        return -1;
    }

    printf( "The database %s is loaded.\n", data_filename );
    nsamples_all = data->rows;
    ntrain_samples = (int)(nsamples_all*0.5);
    var_count = data->cols;

    // Create or load Boosted Tree classifier
    if( filename_to_load )
    {
        // load classifier from the specified file
        boost.load( filename_to_load );
        ntrain_samples = 0;
        if( !boost.get_weak_predictors() )
        {
            printf( "Could not read the classifier %s\n", filename_to_load );
            return -1;
        }
        printf( "The classifier %s is loaded.\n", data_filename );
    }
    else
    {
        // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        //
        // As currently boosted tree classifier in MLL can only be trained
        // for 2-class problems, we transform the training database by
        // "unrolling" each training sample as many times as the number of
        // classes (26) that we have.
        //
        // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

        CvMat* new_data = cvCreateMat( ntrain_samples*class_count, var_count + 1, CV_32F );
        CvMat* new_responses = cvCreateMat( ntrain_samples*class_count, 1, CV_32S );

        // 1. unroll the database type mask
        printf( "Unrolling the database...\n");
        for( i = 0; i < ntrain_samples; i++ )
        {
            float* data_row = (float*)(data->data.ptr + data->step*i);
            for( j = 0; j < class_count; j++ )
            {
                float* new_data_row = (float*)(new_data->data.ptr +
                                new_data->step*(i*class_count+j));
                for( k = 0; k < var_count; k++ )
                    new_data_row[k] = data_row[k];
                new_data_row[var_count] = (float)j;
                new_responses->data.i[i*class_count + j] = responses->data.fl[i] == j+'A';
            }
        }

        // 2. create type mask
        var_type = cvCreateMat( var_count + 2, 1, CV_8U );
        cvSet( var_type, cvScalarAll(CV_VAR_ORDERED) );
        // the last indicator variable, as well
        // as the new (binary) response are categorical
        cvSetReal1D( var_type, var_count, CV_VAR_CATEGORICAL );
        cvSetReal1D( var_type, var_count+1, CV_VAR_CATEGORICAL );

        // 3. train classifier
        printf( "Training the classifier (may take a few minutes)...\n");
        boost.train( new_data, CV_ROW_SAMPLE, new_responses, 0, 0, var_type, 0,
            CvBoostParams(CvBoost::REAL, 100, 0.95, 5, false, 0 ));
        cvReleaseMat( &new_data );
        cvReleaseMat( &new_responses );
        printf("\n");
    }

    temp_sample = cvCreateMat( 1, var_count + 1, CV_32F );
    weak_responses = cvCreateMat( 1, boost.get_weak_predictors()->total, CV_32F ); 

    // compute prediction error on train and test data
    for( i = 0; i < nsamples_all; i++ )
    {
        int best_class = 0;
        double max_sum = -DBL_MAX;
        double r;
        CvMat sample;
        cvGetRow( data, &sample, i );
        for( k = 0; k < var_count; k++ )
            temp_sample->data.fl[k] = sample.data.fl[k];

        for( j = 0; j < class_count; j++ )
        {
            temp_sample->data.fl[var_count] = (float)j;
            boost.predict( temp_sample, 0, weak_responses );
            double sum = cvSum( weak_responses ).val[0];
            if( max_sum < sum )
            {
                max_sum = sum;
                best_class = j + 'A';
            }
        }

        r = fabs(best_class - responses->data.fl[i]) < FLT_EPSILON ? 1 : 0;

        if( i < ntrain_samples )
            train_hr += r;
        else
            test_hr += r;
    }

    test_hr /= (double)(nsamples_all-ntrain_samples);
    train_hr /= (double)ntrain_samples;
    printf( "Recognition rate: train = %.1f%%, test = %.1f%%\n",
            train_hr*100., test_hr*100. );

    printf( "Number of trees: %d\n", boost.get_weak_predictors()->total );

    // Save classifier to file if needed
    if( filename_to_save )
        boost.save( filename_to_save );

    cvReleaseMat( &temp_sample );
    cvReleaseMat( &weak_responses );
    cvReleaseMat( &var_type );
    cvReleaseMat( &data );
    cvReleaseMat( &responses );

    return 0;
}
	void setPartFilter(const char* filename) {
		partFilter.load(filename, "hog");
	}
示例#11
0
int main(int argc, char** argv)
{
    cv::FileStorage fs(argv[1], cv::FileStorage::READ);
    CvBoost classifier;
    classifier.read(*fs, *fs["classifier"]);

    // Fetch features
    H5File h5f = open_feature_file(argv[2]);
    vector<string> names = feature_names(h5f);
    int num_features = names.size();

    // Find image size
    Size imsize;
    read_feature_size(h5f, imsize, names[0].c_str());

    // figure out how many chunks to break into
    int row_block_size = imsize.height / (imsize.height / 1024) + 1;
    int col_block_size = imsize.width / (imsize.width / 1024) + 1;

    // Output image
    Mat prediction(imsize, CV_32FC1);
    Rect fullrect(0, 0, imsize.width, imsize.height);

    for (int basecol = 0; basecol < imsize.width; basecol += col_block_size) {
        for (int baserow = 0; baserow < imsize.height; baserow += row_block_size) {
            cout << basecol << " " << baserow << endl;
            Rect roi(basecol, baserow, col_block_size, row_block_size);
            roi &= fullrect;

            // Stack columns
            Mat stacked_features(roi.width * roi.height, num_features, CV_32F);
            for (int fnum = 0; fnum < num_features; fnum++) {
                Mat feature;
                Mat dest;
                read_feature(h5f, feature, names[fnum].c_str(), roi);
                feature.reshape(0, roi.width * roi.height).copyTo(stacked_features.col(fnum));
            }
            
            Mat submat = prediction(roi);
            int stacked_row_offset = 0;
            for (int outrow = 0; outrow < roi.height; outrow++) {
                float *dest = submat.ptr<float>(outrow);
                for (int outcol = 0; outcol < roi.width; outcol++, stacked_row_offset++, dest++) {
                    float sum = classifier.predict(stacked_features.row(stacked_row_offset), Mat(), Range::all(), false, true);
                    // cout << sum << " " << 1 / (1 + exp(-sum)) << endl;
                    *dest = 1 / (1 + exp(-sum));
                }
            }
        }
    }

    if (argc == 3) {
        normalize(prediction, prediction, 0, 1, NORM_MINMAX);
        imshow("result", prediction);
        waitKey(0);
    } else {
        H5File h5fout = create_feature_file(argv[3], prediction);
        write_feature(h5fout, prediction, "probabilities");
        for (int fnum = 0; fnum < num_features; fnum++) {
            if (names[fnum].find("membrane") != string::npos) {
                Mat feature;
                read_feature(h5f, feature, names[fnum].c_str());
                write_feature(h5fout, feature, names[fnum].c_str());
            }
        }
    }
}
示例#12
0
int AdaBoost::train(const char* samples_filename, const char* model_filename, const double ratio, double &train_error, double &test_error)
{
    CvMat* data = 0;
    CvMat* responses = 0;
    CvMat* var_type = 0;
    CvMat* temp_sample = 0;
    CvMat* weak_responses = 0;

	int ok = read_num_class_data( samples_filename, this->number_of_features_, &data, &responses );
	int nsamples_all = 0, ntrain_samples = 0;
	int var_count = 0;
	int i=0, j=0, k=0;
	double train_hr = 0, test_hr = 0;
	CvBoost boost;


	if( !ok )
	{
		cout << "Could not read the sample in" << samples_filename << endl;;
		return -1;
	}

	cout << "The sample file " << samples_filename << " is loaded." << endl;
	nsamples_all = data->rows;
	ntrain_samples = (int)(nsamples_all * ratio);
	var_count = data->cols;

	// create classifier by using <data> and <responses>
	cout << "Training the classifier ..." << endl;

   // create classifiers
	CvMat* new_data = cvCreateMat(ntrain_samples * this->number_of_classes_, var_count + 1 , CV_32F );//+1
	CvMat* new_responses = cvCreateMat( ntrain_samples * this->number_of_classes_, 1, CV_32S );

	// unroll the database type mask
	printf( "Unrolling the samples ...\n");

	for( i = 0; i < ntrain_samples; i++ )
	{
		float* data_row = (float*)(data->data.ptr + data->step*i);

		for( j = 0; j < this->number_of_classes_; j++ )
		{
			float* new_data_row = (float*)(new_data->data.ptr + new_data->step*(i * this->number_of_classes_ + j));

			for( k = 0; k < var_count; k++ )
				new_data_row[k] = data_row[k];

			new_data_row[var_count] = (float)j;
			new_responses->data.i[i * this->number_of_classes_ + j] = responses->data.fl[i] == j + FIRST_LABEL;
		}
	}

	// create type mask
	var_type = cvCreateMat( var_count + 2, 1, CV_8U );
	cvSet( var_type, cvScalarAll(CV_VAR_ORDERED));

	// the last indicator variable, as well
	// as the new (binary) response are categorical
	cvSetReal1D( var_type, var_count, CV_VAR_CATEGORICAL );//CV_VAR_CATEGORICAL CV_VAR_NUMERICAL
	cvSetReal1D( var_type, var_count+1, CV_VAR_CATEGORICAL ); //CV_VAR_CATEGORICAL

	// train classifier
	//printf( "training the classifier (may take a few minutes)...");
	boost.train( new_data, CV_ROW_SAMPLE, new_responses, 0, 0, var_type, 0, this->boost_parameters_);

	cvReleaseMat( &new_data );
	cvReleaseMat( &new_responses );
	//printf("\n");

	temp_sample = cvCreateMat( 1, var_count + 1, CV_32F );
	weak_responses = cvCreateMat( 1, boost.get_weak_predictors()->total, CV_32F );

	// compute prediction error on train and test data
	for( i = 0; i < nsamples_all; i++ )
	{
		int best_class = 0;
		double max_sum = -DBL_MAX;
		double r;
		CvMat sample;
		cvGetRow( data, &sample, i );

		for( k = 0; k < var_count; k++ )
			temp_sample->data.fl[k] = sample.data.fl[k];

		for( j = 0; j < this->number_of_classes_; j++ )
		{
			temp_sample->data.fl[var_count] = (float)j;

			boost.predict( temp_sample, 0, weak_responses );
			double sum = cvSum( weak_responses ).val[0];

			if( max_sum < sum )
			{
				max_sum = sum;
				best_class = j + FIRST_LABEL;
			}
		}

		r = fabs(best_class - responses->data.fl[i]) < FLT_EPSILON ? 1 : 0;

		if( i < ntrain_samples )
			train_hr += r;
		else
			test_hr += r;
	}

	train_hr /= (double)ntrain_samples;
	test_hr /= ((double)nsamples_all - (double)ntrain_samples);

	cout << "Recognition rate: train = " << train_hr * 100 << ", test = " << test_hr * 100 << endl;

	// fill result-parameters
	train_error = 1 - train_hr;
	test_error = 1 - test_hr;

	// Save classifier to file if needed
	if( model_filename )
		boost.save( model_filename );

	boost.clear();
	cvReleaseMat( &temp_sample );
	cvReleaseMat( &weak_responses );
	cvReleaseMat( &var_type );
	cvReleaseMat( &data );
	cvReleaseMat( &responses );

	return 0;
}
示例#13
0
int AdaBoost::test(const char* sample_filename, const char* model_filename, double &test_error)
{
	CvMat* data = 0;
	CvMat* responses = 0;
	CvMat* var_type = 0;
	CvMat* temp_sample = 0;
	CvMat* weak_responses = 0;

	int ok = 0;
	int nsamples_all = 0;
	int var_count;
	int i, j, k;
	double test_hr = 0;
	CvBoost boost;

	ok = read_num_class_data( sample_filename, this->number_of_features_, &data, &responses );

	if( !ok )
	{
		printf( "Could not read the test-file %s\n", sample_filename );
		return -1;
	}

	printf( "The test-file %s is loaded.\n", sample_filename );

	nsamples_all = data->rows;
	var_count = data->cols;

	cout << "no. of test samples: " << nsamples_all << std::endl;
	cout << "no. of features: " <<  var_count << std::endl;
	cout << "no. of classifiers: " <<  this->number_of_classes_ << std::endl;

	// load classifier from the specified file
	boost.load( model_filename );

	if( !boost.get_weak_predictors() )
	{
		printf( "Could not read the classifier %s\n", model_filename );
		return -1;
	}

	//printf( "The classifier %s is loaded.\n", filename_to_load );

	temp_sample = cvCreateMat( 1, var_count + 1, CV_32F );
	weak_responses = cvCreateMat( 1, boost.get_weak_predictors()->total, CV_32F );

	// compute prediction error on test data
	for( i = 0; i < nsamples_all; i++ )
	{
		int best_class = 0;
		double max_sum = -DBL_MAX;
		double r;
		CvMat sample;
		cvGetRow( data, &sample, i );

		for( k = 0; k < var_count; k++ )
			temp_sample->data.fl[k] = sample.data.fl[k];

		for( j = 0; j < this->number_of_classes_; j++ )
		{
			temp_sample->data.fl[var_count] = (float)j;

			boost.predict( temp_sample, 0, weak_responses );
			double sum = cvSum( weak_responses ).val[0];

			if( max_sum < sum )
			{
				max_sum = sum;
				best_class = j + FIRST_LABEL;
			}
		}

		r = fabs(best_class - responses->data.fl[i]) < FLT_EPSILON ? 1 : 0;

		test_hr += r;
	}

	test_hr /= (double) nsamples_all;

	test_error = 1 - test_hr;

	boost.clear();
	cvReleaseMat( &temp_sample );
	cvReleaseMat( &weak_responses );
	cvReleaseMat( &var_type );
	cvReleaseMat( &data );
	cvReleaseMat( &responses );

	return 0;
}
示例#14
0
int main()
{
    const int train_sample_count = 300;
    bool is_regression = false;

    const char* filename = "data/waveform.data";
    int response_idx = 21;

    CvMLData data;

    CvTrainTestSplit spl( train_sample_count );
    
    if(data.read_csv(filename) != 0)
    {
        printf("couldn't read %s\n", filename);
        exit(0);
    }

    data.set_response_idx(response_idx);
    data.change_var_type(response_idx, CV_VAR_CATEGORICAL);
    data.set_train_test_split( &spl );

    const CvMat* values = data.get_values();
    const CvMat* response = data.get_responses();
    const CvMat* missing = data.get_missing();
    const CvMat* var_types = data.get_var_types();
    const CvMat* train_sidx = data.get_train_sample_idx();
    const CvMat* var_idx = data.get_var_idx();
    CvMat*response_map;
    CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL);
    int num_classes = response_map->cols;
    
    CvDTree dtree;
    printf("======DTREE=====\n");
    CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0);
    dtree.train( &data, cvd_params);
    print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() );

#if 0
    /* boosted trees are only implemented for two classes */
    printf("======BOOST=====\n");
    CvBoost boost;
    boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
    print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 );
#endif

    printf("======RTREES=====\n");
    CvRTrees rtrees;
    rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() );

    printf("======ERTREES=====\n");
    CvERTrees ertrees;
    ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() );

    printf("======GBTREES=====\n");
    CvGBTrees gbtrees;
    CvGBTreesParams gbparams;
    gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression
    gbtrees.train( &data, gbparams);
    
    //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx);
    print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0);

    printf("======KNEAREST=====\n");
    CvKNearest knearest;
    //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses,
    //                const Mat& _sample_idx, bool _is_regression,
    //                int _max_k, bool _update_base )
    bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL;
    assert(is_classifier);
    int max_k = 10;
    knearest.train(values, response, train_sidx, is_regression, max_k, false);

    CvMat* new_response = cvCreateMat(response->rows, 1, values->type);
    //print_types();

    //const CvMat* train_sidx = data.get_train_sample_idx();
    knearest.find_nearest(values, max_k, new_response, 0, 0, 0);

    print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR),
                 knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0);

    printf("======== RBF SVM =======\n");
    //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows);
    CvMySVM svm1;
    CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF,
                                     /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm1.train(values, response, train_sidx, var_idx, params1);
    svm1.train_auto(values, response, var_idx, train_sidx, params1);
    svm_print_error(&svm1, values, response, response_idx, train_sidx);

    printf("======== Linear SVM =======\n");
    CvMySVM svm2;
    CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR,
                                     /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm2.train(values, response, train_sidx, var_idx, params2);
    svm2.train_auto(values, response, var_idx, train_sidx, params2);
    svm_print_error(&svm2, values, response, response_idx, train_sidx);

    printf("======NEURONAL NETWORK=====\n");

    int num_layers = 3;
    CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1));
    cvmSetI(&layers, 0, 0, values->cols-1);
    cvmSetI(&layers, 0, 1, num_classes);
    cvmSetI(&layers, 0, 2, num_classes);
    CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0);
    CvANN_MLP_TrainParams ann_params;
    //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP;
    CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes);

    CvMat values2 = cvmat_remove_column(values, response_idx);
    ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000);
    //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000);

    ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx);

#if 0 /* slow */

    printf("======== Polygonal SVM =======\n");
    //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows);
    CvMySVM svm3;
    CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY,
                                     /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1,
                                     /*nu*/0, /*p*/0, /*class_weights*/0,
                                     cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    //svm3.train(values, response, train_sidx, var_idx, params3);
    svm3.train_auto(values, response, var_idx, train_sidx, params3);
    svm_print_error(&svm3, values, response, response_idx, train_sidx);
#endif

    return 0;
}
示例#15
0
int main(int argc, char** argv) {

/* STEP 2. Opening the file */
//1. Declare a structure to keep the data
CvMLData cvml;

//2. Read the file
cvml.read_csv("char_dataset.csv");
//cvml.read_csv("strokes_dataset_noresized.csv");

//3. Indicate which column is the response
cvml.set_response_idx(0);


/* STEP 3. Splitting the samples */
//1. Select 50% for the training (an integer value is also allowed here)
CvTrainTestSplit cvtts(0.9f, true);
//2. Assign the division to the data
cvml.set_train_test_split(&cvtts);

/* STEP 4. The training */
//1. Declare the classifier
CvBoost boost;

ifstream ifile("./trained_boost_char.xml");
if (ifile) 
{
	// The file exists, so we don't need to train 
	boost.load("./trained_boost_char.xml", "boost");
} else {
	//2. Train it with 100 features
	printf("Training ... \n");
	boost.train(&cvml, CvBoostParams(CvBoost::REAL, 2, 0, 1, false, 0), false);
}
cout<<"after train"<<endl;

/* STEP 5. Calculating the testing and training error */
// 1. Declare a couple of vectors to save the predictions of each sample
std::vector<float> train_responses, test_responses;
// 2. Calculate the training error
float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses);
// 3. Calculate the test error
float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses);
printf("Error train %f \n", fl1);
printf("Error test %f \n", fl2);


//Try a char
static const float arr[] = {0,1.659899,0.684169,0.412175,150.000000,81.000000,0.540000,0.358025,0.151203,0.000000,0.000000};

vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false );
float votes      = boost.predict( Mat(sample), Mat(), Range::all(), false, true );

printf("\n The sample (360) is predicted as: %f (with number of votes = %f)\n", prediction,votes);

//Try a NONchar
static const float arr2[] = {0,1.250000,0.433013,0.346410,9.000000,8.000000,0.888889,0.833333,0.375000,0.000000,0.000000};

vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) );
prediction = boost.predict( Mat(sample2), Mat(), Range::all(), false, false );
votes      = boost.predict( Mat(sample2), Mat(), Range::all(), false, true );

printf("\n The sample (367) is predicted as: %f (with number of votes = %f)\n", prediction,votes);

/* STEP 6. Save your classifier */
// Save the trained classifier
boost.save("./trained_boost_char.xml", "boost");

return EXIT_SUCCESS;
}