void svm_train(const FeatureCollection& features,const ICDAR2011DataSet& dataset ,vector<int>& false_idx) { size_t feature_count = features.size(); int validation_feature_count = 5000; size_t dim = features[0].featureArray.size(); Mat train_data(feature_count - validation_feature_count, dim, CV_32F); Mat train_label(feature_count - validation_feature_count, 1, CV_32F); for(int feature_idx = 0; feature_idx < feature_count - validation_feature_count; feature_idx++) { float sum_norm = 0; if (NORM) sum_norm = accumulate(features[feature_idx].featureArray.begin(),features[feature_idx].featureArray.end(),0.0); else { for (int i = 0;i< dim;i++) sum_norm += features[feature_idx].featureArray[i] * features[feature_idx].featureArray[i]; sum_norm = sqrt(sum_norm); } for(int dim_idx = 0; dim_idx < dim; dim_idx++) { FeatureAtPoint fea = features[feature_idx]; train_data.at<float>(feature_idx, dim_idx) = features[feature_idx].featureArray[dim_idx]/sum_norm; } train_label.at<float>(feature_idx) = features[feature_idx].label; } std::cout<<"svm train-------------------------------------"<<std::endl; CvSVM SVM; CvBoost boost; CvBoostParams params = CvBoostParams(CvBoost::REAL, 50, 0.95, 5, false, 0 ); boost.train(train_data, CV_ROW_SAMPLE,train_label, Mat(), Mat(), Mat(),Mat(),params); std::cout<<"svm validad-----------------------------------"<<std::endl; // Calculate trainning error cout<<feature_count<<endl; Mat test_data(1, dim, CV_32F); float predict_correct = 0, positive_cnt = 0, negative_cnt = 0; for(int feature_idx = feature_count - validation_feature_count; feature_idx < feature_count; feature_idx++) { float sum_norm = 0 ; if (NORM) sum_norm = accumulate(features[feature_idx].featureArray.begin(),features[feature_idx].featureArray.end(),0.0); else { for (int i = 0;i< dim;i++) sum_norm += features[feature_idx].featureArray[i] * features[feature_idx].featureArray[i]; sum_norm = sqrt(sum_norm); } for(int dim_idx = 0; dim_idx < dim; dim_idx++) { test_data.at<float>(0, dim_idx) = features[feature_idx].featureArray[dim_idx]/sum_norm; } if(features[feature_idx].label == boost.predict(test_data)) { predict_correct++; } else { false_idx.push_back(feature_idx); } positive_cnt += features[feature_idx].label == 1 ? 1 : 0; negative_cnt += features[feature_idx].label == 0 ? 1 : 0; } cout << "Training accuracy:" << predict_correct / 5000 << " pos_cnt:" << positive_cnt << " neg_cnt:" << negative_cnt << endl; boost.save(((dataset.model_dir + "boost.model").c_str())); }
int main(int argc, char** argv) { //Read the data from csv file CvMLData cvml; cvml.read_csv("char_datasetNM2.csv"); //Indicate which column is the response cvml.set_response_idx(0); //Select 50% for the training CvTrainTestSplit cvtts(0.8f, true); //Assign the division to the data cvml.set_train_test_split(&cvtts); CvBoost boost; ifstream ifile("./trained_classifierNM2.xml"); if (ifile) { //The file exists, so we don't want to train printf("Found trained_boost_char.xml file, remove it if you want to retrain with new data ... \n"); boost.load("./trained_classifierNM2.xml", "boost"); } else { //Train with 100 features printf("Training ... \n"); boost.train(&cvml, CvBoostParams(CvBoost::REAL, 100, 0, 1, false, 0), false); } //Calculate the test and train errors std::vector<float> train_responses, test_responses; float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); printf("Error train %f \n", fl1); printf("Error test %f \n", fl2); //Try a char static const float arr[] = {0,0.870690,0.096485,2.000000,2.000000,0.137080,1.269940,2.000000}; vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false ); float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true ); printf("\n The char sample is predicted as: %f (with number of votes = %f)\n", prediction,votes); printf(" Class probability (using Logistic Correction) is P(r|character) = %f\n", (float)1-(float)1/(1+exp(-2*votes))); //Try a NONchar //static const float arr2[] = {0,1.500000,0.072162,0.000000,8.000000,0.188095,1.578947,16.000000}; static const float arr2[] = {0,0.565217,0.103749,1.000000,2.000000,0.032258,1.525692,10.000000}; vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) ); prediction = boost.predict( Mat(sample2), Mat(), Range::all(), false, false ); votes = boost.predict( Mat(sample2), Mat(), Range::all(), false, true ); printf("\n The non_char sample is predicted as: %f (with number of votes = %f)\n", prediction,votes); printf(" Class probability (using Logistic Correction) is P(r|character) = %f\n\n", (float)1-(float)1/(1+exp(-2*votes))); // Save the trained classifier boost.save("./trained_classifierNM2.xml", "boost"); return EXIT_SUCCESS; }
int main() { const int train_sample_count = 300; //#define LEPIOTA #ifdef LEPIOTA const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data"; #else const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data"; #endif CvDTree dtree; CvBoost boost; CvRTrees rtrees; CvERTrees ertrees; CvMLData data; CvTrainTestSplit spl( train_sample_count ); data.read_csv( filename ); #ifdef LEPIOTA data.set_response_idx( 0 ); #else data.set_response_idx( 21 ); data.change_var_type( 21, CV_VAR_CATEGORICAL ); #endif data.set_train_test_split( &spl ); printf("======DTREE=====\n"); dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 )); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() ); #ifdef LEPIOTA printf("======BOOST=====\n"); boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 ); #endif printf("======RTREES=====\n"); rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() ); return 0; }
int SvmTest( const ICDAR2011DataSet& dataset, FeatureCollection& features , const vector<int>&ProposalLen, const vector< vector<Rect> >&Proposal) { long sumProposal = accumulate( ProposalLen.begin(), ProposalLen.end(), 0 ); assert( sumProposal == features.size() ); CvBoost boost; boost.load((dataset.model_dir + "boost.model").c_str()); for( size_t image_idx = 0; image_idx < ProposalLen.size() ;image_idx ++ ) { string filename = dataset.test_set[ image_idx ]; string resultPath = dataset.result_dir + CmFile::GetFileNameWithoutExtension( filename ) + ".txt"; cout<<"result save path "<<resultPath<<" "<<image_idx<<" of "<<dataset.test_num<<endl; ofstream score_out( resultPath.c_str() ); int num = ProposalLen[ image_idx ]; vector<Rect>proposal = Proposal[ image_idx ]; assert( num == proposal.size() ); long StartIndex = 0; for (int i = 0;i < num ;i++) { FeatureAtPoint featurePoint = features[ i + StartIndex ]; int dim = featurePoint.featureArray.size(); Mat test_data(1, dim, CV_32F); double sum_norm = 0; if (NORM) { sum_norm = accumulate(featurePoint.featureArray.begin(),featurePoint.featureArray.end(),0.0); } else { for (int i = 0;i< dim;i++) sum_norm += featurePoint.featureArray[i] * featurePoint.featureArray[i]; sum_norm = sqrt(sum_norm); } vector<float>sample; for (int dim_idx = 0;dim_idx < dim;dim_idx++) { test_data.at<float>(0, dim_idx) = featurePoint.featureArray[dim_idx]/ sum_norm; sample.push_back(featurePoint.featureArray[dim_idx]/sum_norm); } double score = boost.predict(test_data,Mat(),Range::all(),false,true); // score = score * (-1); score_out<< proposal[i].x << " " << proposal[i].y << " " << proposal[i].width << " " << proposal[i].height << " " << score << endl; } StartIndex += num; score_out.close(); } }
//目前只能进行两分类的识别,否则会抛出异常 void Model::Predict_boosting( const SampleSet& samples, SampleSet& outError ) { int true_resp = 0; CvBoost *model = (CvBoost*)m_pModel; for (int i = 0; i < samples.N(); i++) { float ret = model->predict(samples.GetSampleAt(i), cv::Mat(), cv::Range::all()); if (ret != samples.GetLabelAt(i)) { outError.Add(samples.GetSampleAt(i), samples.GetLabelAt(i)); } else { true_resp++; } } printf("%d %d",samples.N(), true_resp); }
int main(int argc, char** argv) { generateData(); /* STEP 2. Opening the file */ //1. Declare a structure to keep the data CvMLData cvml; //2. Read the file cvml.read_csv("samples.csv"); //3. Indicate which column is the response cvml.set_response_idx(0); /* STEP 3. Splitting the samples */ //1. Select 40 for the training CvTrainTestSplit cvtts(15, true); //2. Assign the division to the data cvml.set_train_test_split(&cvtts); printf("Training ... "); /* STEP 4. The training */ //1. Declare the classifier CvBoost boost; //2. Train it with 100 features boost.train(&cvml, CvBoostParams(CvBoost::REAL, 100, 0, 1, false, 0), false); /* STEP 5. Calculating the testing and training error */ // 1. Declare a couple of vectors to save the predictions of each sample vector<float> train_responses; vector<float> test_responses; // 2. Calculate the training error float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); // 3. Calculate the test error float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); printf("Error train %f \n", fl1); printf("Error test %f \n", fl2); /* STEP 6. Save your classifier */ // Save the trained classifier boost.save("./trained_boost.xml", "boost"); return EXIT_SUCCESS; }
void find_decision_boundary_BT() { img.copyTo( imgDst ); Mat trainSamples, trainClasses; prepare_train_data( trainSamples, trainClasses ); // learn classifier CvBoost boost; Mat var_types( 1, trainSamples.cols + 1, CV_8UC1, Scalar(CV_VAR_ORDERED) ); var_types.at<uchar>( trainSamples.cols ) = CV_VAR_CATEGORICAL; CvBoostParams params( CvBoost::DISCRETE, // boost_type 100, // weak_count 0.95, // weight_trim_rate 2, // max_depth false, //use_surrogates 0 // priors ); boost.train( trainSamples, CV_ROW_SAMPLE, trainClasses, Mat(), Mat(), var_types, Mat(), params ); Mat testSample(1, 2, CV_32FC1 ); for( int y = 0; y < img.rows; y += testStep ) { for( int x = 0; x < img.cols; x += testStep ) { testSample.at<float>(0) = (float)x; testSample.at<float>(1) = (float)y; int response = (int)boost.predict( testSample ); circle( imgDst, Point(x,y), 2, classColors[response], 1 ); } } }
int main(int argc, char** argv) { /* STEP 2. Opening the file */ //1. Declare a structure to keep the data CvMLData cvml; //2. Read the file cvml.read_csv("groups_dataset.csv"); //cvml.read_csv("strokes_dataset_noresized.csv"); //3. Indicate which column is the response cvml.set_response_idx(0); /* STEP 3. Splitting the samples */ //1. Select 50% for the training (an integer value is also allowed here) CvTrainTestSplit cvtts(0.9f, true); //2. Assign the division to the data cvml.set_train_test_split(&cvtts); /* STEP 4. The training */ //1. Declare the classifier CvBoost boost; ifstream ifile("./trained_boost_groups.xml"); if (ifile) { // The file exists, so we don't need to train boost.load("./trained_boost_groups.xml", "boost"); } else { //2. Train it with 100 features printf("Training ... \n"); boost.train(&cvml, CvBoostParams(CvBoost::REAL, 500, 0, 1, false, 0), false); } /* STEP 5. Calculating the testing and training error */ // 1. Declare a couple of vectors to save the predictions of each sample std::vector<float> train_responses, test_responses; // 2. Calculate the training error float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); // 3. Calculate the test error float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); printf("Error train %f \n", fl1); printf("Error test %f \n", fl2); static const float arr[] = {0,-1.980394,1.249858,-0.631116,2.819193,0.305448,0.108346,0.801116,0.104873,0.130908,0.559806,0.255053,0.455610,0.294118,0.455645,1.549193,0.087770,0.144896,1.650866}; vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false ); float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true ); printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction,votes); //static const float arr2[] = {0,0.911369,1.052156,1.154478,3.321924,0.829768,0.249785,0.616930,0.246637,0.399782,0.337159,0.103893,0.308142,0.666667,0.745356,1.118034,0.009747,0.011016,1.130162}; static const float arr2[] = {0,1.14335,3.00412,2.62747,3.26428,2.32749,0.713018,0.47244,0.289846,0.613508,0.40514,0.216716,0.53305,0.878788,3.21698,3.6607,0.0422318,0.114392,2.70868}; vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) ); float prediction2 = boost.predict( Mat(sample2), Mat(), Range::all(), false, false ); float votes2 = boost.predict( Mat(sample2), Mat(), Range::all(), false, true ); printf("\n The group sample is predicted as: %f (with number of votes = %f)\n", prediction2,votes2); /* STEP 6. Save your classifier */ // Save the trained classifier boost.save("./trained_boost_groups.xml", "boost"); return EXIT_SUCCESS; }
static int build_boost_classifier( char* data_filename, char* filename_to_save, char* filename_to_load ) { const int class_count = 26; CvMat* data = 0; CvMat* responses = 0; CvMat* var_type = 0; CvMat* temp_sample = 0; CvMat* weak_responses = 0; int ok = read_num_class_data( data_filename, 16, &data, &responses ); int nsamples_all = 0, ntrain_samples = 0; int var_count; int i, j, k; double train_hr = 0, test_hr = 0; CvBoost boost; if( !ok ) { printf( "Could not read the database %s\n", data_filename ); return -1; } printf( "The database %s is loaded.\n", data_filename ); nsamples_all = data->rows; ntrain_samples = (int)(nsamples_all*0.5); var_count = data->cols; // Create or load Boosted Tree classifier if( filename_to_load ) { // load classifier from the specified file boost.load( filename_to_load ); ntrain_samples = 0; if( !boost.get_weak_predictors() ) { printf( "Could not read the classifier %s\n", filename_to_load ); return -1; } printf( "The classifier %s is loaded.\n", data_filename ); } else { // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // // As currently boosted tree classifier in MLL can only be trained // for 2-class problems, we transform the training database by // "unrolling" each training sample as many times as the number of // classes (26) that we have. // // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! CvMat* new_data = cvCreateMat( ntrain_samples*class_count, var_count + 1, CV_32F ); CvMat* new_responses = cvCreateMat( ntrain_samples*class_count, 1, CV_32S ); // 1. unroll the database type mask printf( "Unrolling the database...\n"); for( i = 0; i < ntrain_samples; i++ ) { float* data_row = (float*)(data->data.ptr + data->step*i); for( j = 0; j < class_count; j++ ) { float* new_data_row = (float*)(new_data->data.ptr + new_data->step*(i*class_count+j)); for( k = 0; k < var_count; k++ ) new_data_row[k] = data_row[k]; new_data_row[var_count] = (float)j; new_responses->data.i[i*class_count + j] = responses->data.fl[i] == j+'A'; } } // 2. create type mask var_type = cvCreateMat( var_count + 2, 1, CV_8U ); cvSet( var_type, cvScalarAll(CV_VAR_ORDERED) ); // the last indicator variable, as well // as the new (binary) response are categorical cvSetReal1D( var_type, var_count, CV_VAR_CATEGORICAL ); cvSetReal1D( var_type, var_count+1, CV_VAR_CATEGORICAL ); // 3. train classifier printf( "Training the classifier (may take a few minutes)...\n"); boost.train( new_data, CV_ROW_SAMPLE, new_responses, 0, 0, var_type, 0, CvBoostParams(CvBoost::REAL, 100, 0.95, 5, false, 0 )); cvReleaseMat( &new_data ); cvReleaseMat( &new_responses ); printf("\n"); } temp_sample = cvCreateMat( 1, var_count + 1, CV_32F ); weak_responses = cvCreateMat( 1, boost.get_weak_predictors()->total, CV_32F ); // compute prediction error on train and test data for( i = 0; i < nsamples_all; i++ ) { int best_class = 0; double max_sum = -DBL_MAX; double r; CvMat sample; cvGetRow( data, &sample, i ); for( k = 0; k < var_count; k++ ) temp_sample->data.fl[k] = sample.data.fl[k]; for( j = 0; j < class_count; j++ ) { temp_sample->data.fl[var_count] = (float)j; boost.predict( temp_sample, 0, weak_responses ); double sum = cvSum( weak_responses ).val[0]; if( max_sum < sum ) { max_sum = sum; best_class = j + 'A'; } } r = fabs(best_class - responses->data.fl[i]) < FLT_EPSILON ? 1 : 0; if( i < ntrain_samples ) train_hr += r; else test_hr += r; } test_hr /= (double)(nsamples_all-ntrain_samples); train_hr /= (double)ntrain_samples; printf( "Recognition rate: train = %.1f%%, test = %.1f%%\n", train_hr*100., test_hr*100. ); printf( "Number of trees: %d\n", boost.get_weak_predictors()->total ); // Save classifier to file if needed if( filename_to_save ) boost.save( filename_to_save ); cvReleaseMat( &temp_sample ); cvReleaseMat( &weak_responses ); cvReleaseMat( &var_type ); cvReleaseMat( &data ); cvReleaseMat( &responses ); return 0; }
void setPartFilter(const char* filename) { partFilter.load(filename, "hog"); }
int main(int argc, char** argv) { cv::FileStorage fs(argv[1], cv::FileStorage::READ); CvBoost classifier; classifier.read(*fs, *fs["classifier"]); // Fetch features H5File h5f = open_feature_file(argv[2]); vector<string> names = feature_names(h5f); int num_features = names.size(); // Find image size Size imsize; read_feature_size(h5f, imsize, names[0].c_str()); // figure out how many chunks to break into int row_block_size = imsize.height / (imsize.height / 1024) + 1; int col_block_size = imsize.width / (imsize.width / 1024) + 1; // Output image Mat prediction(imsize, CV_32FC1); Rect fullrect(0, 0, imsize.width, imsize.height); for (int basecol = 0; basecol < imsize.width; basecol += col_block_size) { for (int baserow = 0; baserow < imsize.height; baserow += row_block_size) { cout << basecol << " " << baserow << endl; Rect roi(basecol, baserow, col_block_size, row_block_size); roi &= fullrect; // Stack columns Mat stacked_features(roi.width * roi.height, num_features, CV_32F); for (int fnum = 0; fnum < num_features; fnum++) { Mat feature; Mat dest; read_feature(h5f, feature, names[fnum].c_str(), roi); feature.reshape(0, roi.width * roi.height).copyTo(stacked_features.col(fnum)); } Mat submat = prediction(roi); int stacked_row_offset = 0; for (int outrow = 0; outrow < roi.height; outrow++) { float *dest = submat.ptr<float>(outrow); for (int outcol = 0; outcol < roi.width; outcol++, stacked_row_offset++, dest++) { float sum = classifier.predict(stacked_features.row(stacked_row_offset), Mat(), Range::all(), false, true); // cout << sum << " " << 1 / (1 + exp(-sum)) << endl; *dest = 1 / (1 + exp(-sum)); } } } } if (argc == 3) { normalize(prediction, prediction, 0, 1, NORM_MINMAX); imshow("result", prediction); waitKey(0); } else { H5File h5fout = create_feature_file(argv[3], prediction); write_feature(h5fout, prediction, "probabilities"); for (int fnum = 0; fnum < num_features; fnum++) { if (names[fnum].find("membrane") != string::npos) { Mat feature; read_feature(h5f, feature, names[fnum].c_str()); write_feature(h5fout, feature, names[fnum].c_str()); } } } }
int AdaBoost::train(const char* samples_filename, const char* model_filename, const double ratio, double &train_error, double &test_error) { CvMat* data = 0; CvMat* responses = 0; CvMat* var_type = 0; CvMat* temp_sample = 0; CvMat* weak_responses = 0; int ok = read_num_class_data( samples_filename, this->number_of_features_, &data, &responses ); int nsamples_all = 0, ntrain_samples = 0; int var_count = 0; int i=0, j=0, k=0; double train_hr = 0, test_hr = 0; CvBoost boost; if( !ok ) { cout << "Could not read the sample in" << samples_filename << endl;; return -1; } cout << "The sample file " << samples_filename << " is loaded." << endl; nsamples_all = data->rows; ntrain_samples = (int)(nsamples_all * ratio); var_count = data->cols; // create classifier by using <data> and <responses> cout << "Training the classifier ..." << endl; // create classifiers CvMat* new_data = cvCreateMat(ntrain_samples * this->number_of_classes_, var_count + 1 , CV_32F );//+1 CvMat* new_responses = cvCreateMat( ntrain_samples * this->number_of_classes_, 1, CV_32S ); // unroll the database type mask printf( "Unrolling the samples ...\n"); for( i = 0; i < ntrain_samples; i++ ) { float* data_row = (float*)(data->data.ptr + data->step*i); for( j = 0; j < this->number_of_classes_; j++ ) { float* new_data_row = (float*)(new_data->data.ptr + new_data->step*(i * this->number_of_classes_ + j)); for( k = 0; k < var_count; k++ ) new_data_row[k] = data_row[k]; new_data_row[var_count] = (float)j; new_responses->data.i[i * this->number_of_classes_ + j] = responses->data.fl[i] == j + FIRST_LABEL; } } // create type mask var_type = cvCreateMat( var_count + 2, 1, CV_8U ); cvSet( var_type, cvScalarAll(CV_VAR_ORDERED)); // the last indicator variable, as well // as the new (binary) response are categorical cvSetReal1D( var_type, var_count, CV_VAR_CATEGORICAL );//CV_VAR_CATEGORICAL CV_VAR_NUMERICAL cvSetReal1D( var_type, var_count+1, CV_VAR_CATEGORICAL ); //CV_VAR_CATEGORICAL // train classifier //printf( "training the classifier (may take a few minutes)..."); boost.train( new_data, CV_ROW_SAMPLE, new_responses, 0, 0, var_type, 0, this->boost_parameters_); cvReleaseMat( &new_data ); cvReleaseMat( &new_responses ); //printf("\n"); temp_sample = cvCreateMat( 1, var_count + 1, CV_32F ); weak_responses = cvCreateMat( 1, boost.get_weak_predictors()->total, CV_32F ); // compute prediction error on train and test data for( i = 0; i < nsamples_all; i++ ) { int best_class = 0; double max_sum = -DBL_MAX; double r; CvMat sample; cvGetRow( data, &sample, i ); for( k = 0; k < var_count; k++ ) temp_sample->data.fl[k] = sample.data.fl[k]; for( j = 0; j < this->number_of_classes_; j++ ) { temp_sample->data.fl[var_count] = (float)j; boost.predict( temp_sample, 0, weak_responses ); double sum = cvSum( weak_responses ).val[0]; if( max_sum < sum ) { max_sum = sum; best_class = j + FIRST_LABEL; } } r = fabs(best_class - responses->data.fl[i]) < FLT_EPSILON ? 1 : 0; if( i < ntrain_samples ) train_hr += r; else test_hr += r; } train_hr /= (double)ntrain_samples; test_hr /= ((double)nsamples_all - (double)ntrain_samples); cout << "Recognition rate: train = " << train_hr * 100 << ", test = " << test_hr * 100 << endl; // fill result-parameters train_error = 1 - train_hr; test_error = 1 - test_hr; // Save classifier to file if needed if( model_filename ) boost.save( model_filename ); boost.clear(); cvReleaseMat( &temp_sample ); cvReleaseMat( &weak_responses ); cvReleaseMat( &var_type ); cvReleaseMat( &data ); cvReleaseMat( &responses ); return 0; }
int AdaBoost::test(const char* sample_filename, const char* model_filename, double &test_error) { CvMat* data = 0; CvMat* responses = 0; CvMat* var_type = 0; CvMat* temp_sample = 0; CvMat* weak_responses = 0; int ok = 0; int nsamples_all = 0; int var_count; int i, j, k; double test_hr = 0; CvBoost boost; ok = read_num_class_data( sample_filename, this->number_of_features_, &data, &responses ); if( !ok ) { printf( "Could not read the test-file %s\n", sample_filename ); return -1; } printf( "The test-file %s is loaded.\n", sample_filename ); nsamples_all = data->rows; var_count = data->cols; cout << "no. of test samples: " << nsamples_all << std::endl; cout << "no. of features: " << var_count << std::endl; cout << "no. of classifiers: " << this->number_of_classes_ << std::endl; // load classifier from the specified file boost.load( model_filename ); if( !boost.get_weak_predictors() ) { printf( "Could not read the classifier %s\n", model_filename ); return -1; } //printf( "The classifier %s is loaded.\n", filename_to_load ); temp_sample = cvCreateMat( 1, var_count + 1, CV_32F ); weak_responses = cvCreateMat( 1, boost.get_weak_predictors()->total, CV_32F ); // compute prediction error on test data for( i = 0; i < nsamples_all; i++ ) { int best_class = 0; double max_sum = -DBL_MAX; double r; CvMat sample; cvGetRow( data, &sample, i ); for( k = 0; k < var_count; k++ ) temp_sample->data.fl[k] = sample.data.fl[k]; for( j = 0; j < this->number_of_classes_; j++ ) { temp_sample->data.fl[var_count] = (float)j; boost.predict( temp_sample, 0, weak_responses ); double sum = cvSum( weak_responses ).val[0]; if( max_sum < sum ) { max_sum = sum; best_class = j + FIRST_LABEL; } } r = fabs(best_class - responses->data.fl[i]) < FLT_EPSILON ? 1 : 0; test_hr += r; } test_hr /= (double) nsamples_all; test_error = 1 - test_hr; boost.clear(); cvReleaseMat( &temp_sample ); cvReleaseMat( &weak_responses ); cvReleaseMat( &var_type ); cvReleaseMat( &data ); cvReleaseMat( &responses ); return 0; }
int main() { const int train_sample_count = 300; bool is_regression = false; const char* filename = "data/waveform.data"; int response_idx = 21; CvMLData data; CvTrainTestSplit spl( train_sample_count ); if(data.read_csv(filename) != 0) { printf("couldn't read %s\n", filename); exit(0); } data.set_response_idx(response_idx); data.change_var_type(response_idx, CV_VAR_CATEGORICAL); data.set_train_test_split( &spl ); const CvMat* values = data.get_values(); const CvMat* response = data.get_responses(); const CvMat* missing = data.get_missing(); const CvMat* var_types = data.get_var_types(); const CvMat* train_sidx = data.get_train_sample_idx(); const CvMat* var_idx = data.get_var_idx(); CvMat*response_map; CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL); int num_classes = response_map->cols; CvDTree dtree; printf("======DTREE=====\n"); CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0); dtree.train( &data, cvd_params); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() ); #if 0 /* boosted trees are only implemented for two classes */ printf("======BOOST=====\n"); CvBoost boost; boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 ); #endif printf("======RTREES=====\n"); CvRTrees rtrees; rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); CvERTrees ertrees; ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() ); printf("======GBTREES=====\n"); CvGBTrees gbtrees; CvGBTreesParams gbparams; gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression gbtrees.train( &data, gbparams); //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx); print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0); printf("======KNEAREST=====\n"); CvKNearest knearest; //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses, // const Mat& _sample_idx, bool _is_regression, // int _max_k, bool _update_base ) bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL; assert(is_classifier); int max_k = 10; knearest.train(values, response, train_sidx, is_regression, max_k, false); CvMat* new_response = cvCreateMat(response->rows, 1, values->type); //print_types(); //const CvMat* train_sidx = data.get_train_sample_idx(); knearest.find_nearest(values, max_k, new_response, 0, 0, 0); print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR), knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0); printf("======== RBF SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm1; CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm1.train(values, response, train_sidx, var_idx, params1); svm1.train_auto(values, response, var_idx, train_sidx, params1); svm_print_error(&svm1, values, response, response_idx, train_sidx); printf("======== Linear SVM =======\n"); CvMySVM svm2; CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm2.train(values, response, train_sidx, var_idx, params2); svm2.train_auto(values, response, var_idx, train_sidx, params2); svm_print_error(&svm2, values, response, response_idx, train_sidx); printf("======NEURONAL NETWORK=====\n"); int num_layers = 3; CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1)); cvmSetI(&layers, 0, 0, values->cols-1); cvmSetI(&layers, 0, 1, num_classes); cvmSetI(&layers, 0, 2, num_classes); CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0); CvANN_MLP_TrainParams ann_params; //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP; CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes); CvMat values2 = cvmat_remove_column(values, response_idx); ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000); //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000); ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx); #if 0 /* slow */ printf("======== Polygonal SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm3; CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY, /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm3.train(values, response, train_sidx, var_idx, params3); svm3.train_auto(values, response, var_idx, train_sidx, params3); svm_print_error(&svm3, values, response, response_idx, train_sidx); #endif return 0; }
int main(int argc, char** argv) { /* STEP 2. Opening the file */ //1. Declare a structure to keep the data CvMLData cvml; //2. Read the file cvml.read_csv("char_dataset.csv"); //cvml.read_csv("strokes_dataset_noresized.csv"); //3. Indicate which column is the response cvml.set_response_idx(0); /* STEP 3. Splitting the samples */ //1. Select 50% for the training (an integer value is also allowed here) CvTrainTestSplit cvtts(0.9f, true); //2. Assign the division to the data cvml.set_train_test_split(&cvtts); /* STEP 4. The training */ //1. Declare the classifier CvBoost boost; ifstream ifile("./trained_boost_char.xml"); if (ifile) { // The file exists, so we don't need to train boost.load("./trained_boost_char.xml", "boost"); } else { //2. Train it with 100 features printf("Training ... \n"); boost.train(&cvml, CvBoostParams(CvBoost::REAL, 2, 0, 1, false, 0), false); } cout<<"after train"<<endl; /* STEP 5. Calculating the testing and training error */ // 1. Declare a couple of vectors to save the predictions of each sample std::vector<float> train_responses, test_responses; // 2. Calculate the training error float fl1 = boost.calc_error(&cvml,CV_TRAIN_ERROR,&train_responses); // 3. Calculate the test error float fl2 = boost.calc_error(&cvml,CV_TEST_ERROR,&test_responses); printf("Error train %f \n", fl1); printf("Error test %f \n", fl2); //Try a char static const float arr[] = {0,1.659899,0.684169,0.412175,150.000000,81.000000,0.540000,0.358025,0.151203,0.000000,0.000000}; vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); float prediction = boost.predict( Mat(sample), Mat(), Range::all(), false, false ); float votes = boost.predict( Mat(sample), Mat(), Range::all(), false, true ); printf("\n The sample (360) is predicted as: %f (with number of votes = %f)\n", prediction,votes); //Try a NONchar static const float arr2[] = {0,1.250000,0.433013,0.346410,9.000000,8.000000,0.888889,0.833333,0.375000,0.000000,0.000000}; vector<float> sample2 (arr2, arr2 + sizeof(arr2) / sizeof(arr2[0]) ); prediction = boost.predict( Mat(sample2), Mat(), Range::all(), false, false ); votes = boost.predict( Mat(sample2), Mat(), Range::all(), false, true ); printf("\n The sample (367) is predicted as: %f (with number of votes = %f)\n", prediction,votes); /* STEP 6. Save your classifier */ // Save the trained classifier boost.save("./trained_boost_char.xml", "boost"); return EXIT_SUCCESS; }