示例#1
0
int AdaBoost::train(const char* samples_filename, const char* model_filename, const double ratio, double &train_error, double &test_error)
{
    CvMat* data = 0;
    CvMat* responses = 0;
    CvMat* var_type = 0;
    CvMat* temp_sample = 0;
    CvMat* weak_responses = 0;

	int ok = read_num_class_data( samples_filename, this->number_of_features_, &data, &responses );
	int nsamples_all = 0, ntrain_samples = 0;
	int var_count = 0;
	int i=0, j=0, k=0;
	double train_hr = 0, test_hr = 0;
	CvBoost boost;


	if( !ok )
	{
		cout << "Could not read the sample in" << samples_filename << endl;;
		return -1;
	}

	cout << "The sample file " << samples_filename << " is loaded." << endl;
	nsamples_all = data->rows;
	ntrain_samples = (int)(nsamples_all * ratio);
	var_count = data->cols;

	// create classifier by using <data> and <responses>
	cout << "Training the classifier ..." << endl;

   // create classifiers
	CvMat* new_data = cvCreateMat(ntrain_samples * this->number_of_classes_, var_count + 1 , CV_32F );//+1
	CvMat* new_responses = cvCreateMat( ntrain_samples * this->number_of_classes_, 1, CV_32S );

	// unroll the database type mask
	printf( "Unrolling the samples ...\n");

	for( i = 0; i < ntrain_samples; i++ )
	{
		float* data_row = (float*)(data->data.ptr + data->step*i);

		for( j = 0; j < this->number_of_classes_; j++ )
		{
			float* new_data_row = (float*)(new_data->data.ptr + new_data->step*(i * this->number_of_classes_ + j));

			for( k = 0; k < var_count; k++ )
				new_data_row[k] = data_row[k];

			new_data_row[var_count] = (float)j;
			new_responses->data.i[i * this->number_of_classes_ + j] = responses->data.fl[i] == j + FIRST_LABEL;
		}
	}

	// create type mask
	var_type = cvCreateMat( var_count + 2, 1, CV_8U );
	cvSet( var_type, cvScalarAll(CV_VAR_ORDERED));

	// the last indicator variable, as well
	// as the new (binary) response are categorical
	cvSetReal1D( var_type, var_count, CV_VAR_CATEGORICAL );//CV_VAR_CATEGORICAL CV_VAR_NUMERICAL
	cvSetReal1D( var_type, var_count+1, CV_VAR_CATEGORICAL ); //CV_VAR_CATEGORICAL

	// train classifier
	//printf( "training the classifier (may take a few minutes)...");
	boost.train( new_data, CV_ROW_SAMPLE, new_responses, 0, 0, var_type, 0, this->boost_parameters_);

	cvReleaseMat( &new_data );
	cvReleaseMat( &new_responses );
	//printf("\n");

	temp_sample = cvCreateMat( 1, var_count + 1, CV_32F );
	weak_responses = cvCreateMat( 1, boost.get_weak_predictors()->total, CV_32F );

	// compute prediction error on train and test data
	for( i = 0; i < nsamples_all; i++ )
	{
		int best_class = 0;
		double max_sum = -DBL_MAX;
		double r;
		CvMat sample;
		cvGetRow( data, &sample, i );

		for( k = 0; k < var_count; k++ )
			temp_sample->data.fl[k] = sample.data.fl[k];

		for( j = 0; j < this->number_of_classes_; j++ )
		{
			temp_sample->data.fl[var_count] = (float)j;

			boost.predict( temp_sample, 0, weak_responses );
			double sum = cvSum( weak_responses ).val[0];

			if( max_sum < sum )
			{
				max_sum = sum;
				best_class = j + FIRST_LABEL;
			}
		}

		r = fabs(best_class - responses->data.fl[i]) < FLT_EPSILON ? 1 : 0;

		if( i < ntrain_samples )
			train_hr += r;
		else
			test_hr += r;
	}

	train_hr /= (double)ntrain_samples;
	test_hr /= ((double)nsamples_all - (double)ntrain_samples);

	cout << "Recognition rate: train = " << train_hr * 100 << ", test = " << test_hr * 100 << endl;

	// fill result-parameters
	train_error = 1 - train_hr;
	test_error = 1 - test_hr;

	// Save classifier to file if needed
	if( model_filename )
		boost.save( model_filename );

	boost.clear();
	cvReleaseMat( &temp_sample );
	cvReleaseMat( &weak_responses );
	cvReleaseMat( &var_type );
	cvReleaseMat( &data );
	cvReleaseMat( &responses );

	return 0;
}
示例#2
0
int AdaBoost::test(const char* sample_filename, const char* model_filename, double &test_error)
{
	CvMat* data = 0;
	CvMat* responses = 0;
	CvMat* var_type = 0;
	CvMat* temp_sample = 0;
	CvMat* weak_responses = 0;

	int ok = 0;
	int nsamples_all = 0;
	int var_count;
	int i, j, k;
	double test_hr = 0;
	CvBoost boost;

	ok = read_num_class_data( sample_filename, this->number_of_features_, &data, &responses );

	if( !ok )
	{
		printf( "Could not read the test-file %s\n", sample_filename );
		return -1;
	}

	printf( "The test-file %s is loaded.\n", sample_filename );

	nsamples_all = data->rows;
	var_count = data->cols;

	cout << "no. of test samples: " << nsamples_all << std::endl;
	cout << "no. of features: " <<  var_count << std::endl;
	cout << "no. of classifiers: " <<  this->number_of_classes_ << std::endl;

	// load classifier from the specified file
	boost.load( model_filename );

	if( !boost.get_weak_predictors() )
	{
		printf( "Could not read the classifier %s\n", model_filename );
		return -1;
	}

	//printf( "The classifier %s is loaded.\n", filename_to_load );

	temp_sample = cvCreateMat( 1, var_count + 1, CV_32F );
	weak_responses = cvCreateMat( 1, boost.get_weak_predictors()->total, CV_32F );

	// compute prediction error on test data
	for( i = 0; i < nsamples_all; i++ )
	{
		int best_class = 0;
		double max_sum = -DBL_MAX;
		double r;
		CvMat sample;
		cvGetRow( data, &sample, i );

		for( k = 0; k < var_count; k++ )
			temp_sample->data.fl[k] = sample.data.fl[k];

		for( j = 0; j < this->number_of_classes_; j++ )
		{
			temp_sample->data.fl[var_count] = (float)j;

			boost.predict( temp_sample, 0, weak_responses );
			double sum = cvSum( weak_responses ).val[0];

			if( max_sum < sum )
			{
				max_sum = sum;
				best_class = j + FIRST_LABEL;
			}
		}

		r = fabs(best_class - responses->data.fl[i]) < FLT_EPSILON ? 1 : 0;

		test_hr += r;
	}

	test_hr /= (double) nsamples_all;

	test_error = 1 - test_hr;

	boost.clear();
	cvReleaseMat( &temp_sample );
	cvReleaseMat( &weak_responses );
	cvReleaseMat( &var_type );
	cvReleaseMat( &data );
	cvReleaseMat( &responses );

	return 0;
}