void Model::Train_gbt( const SampleSet& samples ) { CvGBTrees* model = (CvGBTrees*)m_pModel; CvGBTreesParams* para = (CvGBTreesParams*)m_trainPara; model->train(samples.Samples(), CV_ROW_SAMPLE, samples.Labels(), cv::Mat(), cv::Mat(), cv::Mat(), cv::Mat(), *para); }
int CV_GBTreesTest::TestSaveLoad() { if (!gtb) return cvtest::TS::FAIL_GENERIC; model_file_name1 = cv::tempfile(); model_file_name2 = cv::tempfile(); gtb->save(model_file_name1.c_str()); gtb->calc_error(data, CV_TEST_ERROR, &test_resps1); gtb->load(model_file_name1.c_str()); gtb->calc_error(data, CV_TEST_ERROR, &test_resps2); gtb->save(model_file_name2.c_str()); return checkLoadSave(); }
void Model::Predict_gbt( const SampleSet& samples, SampleSet& outError ) { int true_resp = 0; CvGBTrees *model = (CvGBTrees*)m_pModel; for (int i = 0; i < samples.N(); i++) { float ret; ret = model->predict(samples.GetSampleAt(i), cv::Mat(), cv::Range::all()); if (ret != samples.GetLabelAt(i)) { outError.Add(samples.GetSampleAt(i), samples.GetLabelAt(i)); } else { true_resp++; } } printf("%d %d",samples.N(), true_resp); }
int CV_GBTreesTest::TestSaveLoad() { if (!gtb) return CvTS::FAIL_GENERIC; tmpnam(model_file_name1); tmpnam(model_file_name2); if(model_file_name1[0] == '\\') model_file_name1[0] = '_'; if(model_file_name2[0] == '\\') model_file_name2[0] = '_'; gtb->save(model_file_name1); gtb->calc_error(data, CV_TEST_ERROR, &test_resps1); gtb->load(model_file_name1); gtb->calc_error(data, CV_TEST_ERROR, &test_resps2); gtb->save(model_file_name2); return checkLoadSave(); }
virtual void operator()(const cv::BlockedRange& range) const { int begin = range.begin(); int end = range.end(); CvMat x; CvMat miss; for (int i=begin; i<end; ++i) { int j = idx ? idx->data.i[i] : i; cvGetRow(samples, &x, j); if (!missing) { predictions[i] = gbt->predict_serial(&x,0,0,slice); } else { cvGetRow(missing, &miss, j); predictions[i] = gbt->predict_serial(&x,&miss,0,slice); } } } // Sample_predictor::operator()
void find_decision_boundary_GBT() { img.copyTo( imgDst ); Mat trainSamples, trainClasses; prepare_train_data( trainSamples, trainClasses ); // learn classifier CvGBTrees gbtrees; Mat var_types( 1, trainSamples.cols + 1, CV_8UC1, Scalar(CV_VAR_ORDERED) ); var_types.at<uchar>( trainSamples.cols ) = CV_VAR_CATEGORICAL; CvGBTreesParams params( CvGBTrees::DEVIANCE_LOSS, // loss_function_type 100, // weak_count 0.1f, // shrinkage 1.0f, // subsample_portion 2, // max_depth false // use_surrogates ) ); gbtrees.train( trainSamples, CV_ROW_SAMPLE, trainClasses, Mat(), Mat(), var_types, Mat(), params ); Mat testSample(1, 2, CV_32FC1 ); for( int y = 0; y < img.rows; y += testStep ) { for( int x = 0; x < img.cols; x += testStep ) { testSample.at<float>(0) = (float)x; testSample.at<float>(1) = (float)y; int response = (int)gbtrees.predict( testSample ); circle( imgDst, Point(x,y), 2, classColors[response], 1 ); } } }
int CV_GBTreesTest::checkPredictError(int test_num) { if (!gtb) return CvTS::FAIL_GENERIC; float mean[] = {5.430247f, 13.5654f, 12.6569f, 13.1661f}; float sigma[] = {0.4162694f, 3.21161f, 3.43297f, 3.00624f}; float current_error = gtb->calc_error(data, CV_TEST_ERROR); if ( abs( current_error - mean[test_num]) > 6*sigma[test_num] ) { ts->printf( CvTS::LOG, "Test error is out of range:\n" "abs(%f/*curEr*/ - %f/*mean*/ > %f/*6*sigma*/", current_error, mean[test_num], 6*sigma[test_num] ); return CvTS::FAIL_BAD_ACCURACY; } return CvTS::OK; }
int CV_GBTreesTest::TestTrainPredict(int test_num) { int code = cvtest::TS::OK; int weak_count = 200; float shrinkage = 0.1f; float subsample_portion = 0.5f; int max_depth = 5; bool use_surrogates = false; int loss_function_type = 0; switch (test_num) { case (1) : loss_function_type = CvGBTrees::SQUARED_LOSS; break; case (2) : loss_function_type = CvGBTrees::ABSOLUTE_LOSS; break; case (3) : loss_function_type = CvGBTrees::HUBER_LOSS; break; case (0) : loss_function_type = CvGBTrees::DEVIANCE_LOSS; break; default : { ts->printf( cvtest::TS::LOG, "Bad test_num value in CV_GBTreesTest::TestTrainPredict(..) function." ); return cvtest::TS::FAIL_BAD_ARG_CHECK; } } int dataset_num = test_num == 0 ? 0 : 1; if (!data) { data = new CvMLData(); data->set_delimiter(','); if (data->read_csv(datasets[dataset_num].c_str())) { ts->printf( cvtest::TS::LOG, "File reading error." ); return cvtest::TS::FAIL_INVALID_TEST_DATA; } if (test_num == 0) { data->set_response_idx(57); data->set_var_types("ord[0-56],cat[57]"); } else { data->set_response_idx(13); data->set_var_types("ord[0-2,4-13],cat[3]"); subsample_portion = 0.7f; } int train_sample_count = cvFloor(_get_len(data->get_responses())*0.5f); CvTrainTestSplit spl( train_sample_count ); data->set_train_test_split( &spl ); } data->mix_train_and_test_idx(); if (gtb) delete gtb; gtb = new CvGBTrees(); bool tmp_code = true; tmp_code = gtb->train(data, CvGBTreesParams(loss_function_type, weak_count, shrinkage, subsample_portion, max_depth, use_surrogates)); if (!tmp_code) { ts->printf( cvtest::TS::LOG, "Model training was failed."); return cvtest::TS::FAIL_INVALID_OUTPUT; } code = checkPredictError(test_num); return code; }
int main() { const int train_sample_count = 300; bool is_regression = false; const char* filename = "data/waveform.data"; int response_idx = 21; CvMLData data; CvTrainTestSplit spl( train_sample_count ); if(data.read_csv(filename) != 0) { printf("couldn't read %s\n", filename); exit(0); } data.set_response_idx(response_idx); data.change_var_type(response_idx, CV_VAR_CATEGORICAL); data.set_train_test_split( &spl ); const CvMat* values = data.get_values(); const CvMat* response = data.get_responses(); const CvMat* missing = data.get_missing(); const CvMat* var_types = data.get_var_types(); const CvMat* train_sidx = data.get_train_sample_idx(); const CvMat* var_idx = data.get_var_idx(); CvMat*response_map; CvMat*ordered_response = cv_preprocess_categories(response, var_idx, response->rows, &response_map, NULL); int num_classes = response_map->cols; CvDTree dtree; printf("======DTREE=====\n"); CvDTreeParams cvd_params( 10, 1, 0, false, 16, 0, false, false, 0); dtree.train( &data, cvd_params); print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() ); #if 0 /* boosted trees are only implemented for two classes */ printf("======BOOST=====\n"); CvBoost boost; boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0)); print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR), 0 ); #endif printf("======RTREES=====\n"); CvRTrees rtrees; rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() ); printf("======ERTREES=====\n"); CvERTrees ertrees; ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER )); print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() ); printf("======GBTREES=====\n"); CvGBTrees gbtrees; CvGBTreesParams gbparams; gbparams.loss_function_type = CvGBTrees::DEVIANCE_LOSS; // classification, not regression gbtrees.train( &data, gbparams); //gbt_print_error(&gbtrees, values, response, response_idx, train_sidx); print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0); printf("======KNEAREST=====\n"); CvKNearest knearest; //bool CvKNearest::train( const Mat& _train_data, const Mat& _responses, // const Mat& _sample_idx, bool _is_regression, // int _max_k, bool _update_base ) bool is_classifier = var_types->data.ptr[var_types->cols-1] == CV_VAR_CATEGORICAL; assert(is_classifier); int max_k = 10; knearest.train(values, response, train_sidx, is_regression, max_k, false); CvMat* new_response = cvCreateMat(response->rows, 1, values->type); //print_types(); //const CvMat* train_sidx = data.get_train_sample_idx(); knearest.find_nearest(values, max_k, new_response, 0, 0, 0); print_result(knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TRAIN_ERROR), knearest_calc_error(values, response, new_response, train_sidx, is_regression, CV_TEST_ERROR), 0); printf("======== RBF SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm1; CvSVMParams params1 = CvSVMParams(CvSVM::C_SVC, CvSVM::RBF, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm1.train(values, response, train_sidx, var_idx, params1); svm1.train_auto(values, response, var_idx, train_sidx, params1); svm_print_error(&svm1, values, response, response_idx, train_sidx); printf("======== Linear SVM =======\n"); CvMySVM svm2; CvSVMParams params2 = CvSVMParams(CvSVM::C_SVC, CvSVM::LINEAR, /*degree*/0, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm2.train(values, response, train_sidx, var_idx, params2); svm2.train_auto(values, response, var_idx, train_sidx, params2); svm_print_error(&svm2, values, response, response_idx, train_sidx); printf("======NEURONAL NETWORK=====\n"); int num_layers = 3; CvMat layers = cvMat(1, num_layers, CV_32SC1, calloc(1, sizeof(double)*num_layers*1)); cvmSetI(&layers, 0, 0, values->cols-1); cvmSetI(&layers, 0, 1, num_classes); cvmSetI(&layers, 0, 2, num_classes); CvANN_MLP ann(&layers, CvANN_MLP::SIGMOID_SYM, 0.0, 0.0); CvANN_MLP_TrainParams ann_params; //ann_params.train_method = CvANN_MLP_TrainParams::BACKPROP; CvMat ann_response = cvmat_make_boolean_class_columns(response, num_classes); CvMat values2 = cvmat_remove_column(values, response_idx); ann.train(&values2, &ann_response, NULL, train_sidx, ann_params, 0x0000); //ann.train(values, &ann_response, NULL, train_sidx, ann_params, 0x0000); ann_print_error(&ann, values, num_classes, &ann_response, response, response_idx, train_sidx); #if 0 /* slow */ printf("======== Polygonal SVM =======\n"); //printf("indexes: %d / %d, responses: %d\n", train_sidx->cols, var_idx->cols, values->rows); CvMySVM svm3; CvSVMParams params3 = CvSVMParams(CvSVM::C_SVC, CvSVM::POLY, /*degree*/2, /*gamma*/1, /*coef0*/0, /*C*/1, /*nu*/0, /*p*/0, /*class_weights*/0, cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, FLT_EPSILON)); //svm3.train(values, response, train_sidx, var_idx, params3); svm3.train_auto(values, response, var_idx, train_sidx, params3); svm_print_error(&svm3, values, response, response_idx, train_sidx); #endif return 0; }