virtual void operator()(const cv::BlockedRange& range) const { #ifdef HAVE_TBB tbb::spin_mutex::scoped_lock lock; #endif CvSeqReader reader; int begin = range.begin(); int end = range.end(); int weak_count = end - begin; CvDTree* tree; for (int i=0; i<k; ++i) { float tmp_sum = 0.0f; if ((weak[i]) && (weak_count)) { cvStartReadSeq( weak[i], &reader ); cvSetSeqReaderPos( &reader, begin ); for (int j=0; j<weak_count; ++j) { CV_READ_SEQ_ELEM( tree, reader ); tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value); } } #ifdef HAVE_TBB lock.acquire(SumMutex); sum[i] += tmp_sum; lock.release(); #else sum[i] += tmp_sum; #endif } } // Tree_predictor::operator()
CvDTree* mushroom_create_dtree( const CvMat* data, const CvMat* missing, const CvMat* responses, float p_weight ) { CvDTree* dtree; CvMat* var_type; int i, hr1 = 0, hr2 = 0, p_total = 0; float priors[] = { 1, p_weight }; var_type = cvCreateMat( data->cols + 1, 1, CV_8U ); cvSet( var_type, cvScalarAll(CV_VAR_CATEGORICAL) ); // all the variables are categorical dtree = new CvDTree; dtree->train( data, CV_ROW_SAMPLE, responses, 0, 0, var_type, missing, CvDTreeParams( 8, // max depth 10, // min sample count 0, // regression accuracy: N/A here true, // compute surrogate split, as we have missing data 15, // max number of categories (use sub-optimal algorithm for larger numbers) 10, // the number of cross-validation folds true, // use 1SE rule => smaller tree true, // throw away the pruned tree branches priors // the array of priors, the bigger p_weight, the more attention // to the poisonous mushrooms // (a mushroom will be judjed to be poisonous with bigger chance) )); // compute hit-rate on the training database, demonstrates predict usage. for( i = 0; i < data->rows; i++ ) { CvMat sample, mask; cvGetRow( data, &sample, i ); cvGetRow( missing, &mask, i ); double r = dtree->predict( &sample, &mask )->value; int d = fabs(r - responses->data.fl[i]) >= FLT_EPSILON; if( d ) { if( r != 'p' ) hr1++; else hr2++; } p_total += responses->data.fl[i] == 'p'; } printf( "Results on the training database:\n" "\tPoisonous mushrooms mis-predicted: %d (%g%%)\n" "\tFalse-alarms: %d (%g%%)\n", hr1, (double)hr1*100/p_total, hr2, (double)hr2*100/(data->rows - p_total) ); cvReleaseMat( &var_type ); return dtree; }
void Model::Predict_tree( const SampleSet& samples, SampleSet& outError ) { int true_resp = 0; CvDTree *model = (CvDTree*)m_pModel; for (int i = 0; i < samples.N(); i++) { CvDTreeNode *pnode; pnode = model->predict(samples.GetSampleAt(i), cv::Mat()); if (pnode->value != samples.GetLabelAt(i)) { outError.Add(samples.GetSampleAt(i), samples.GetLabelAt(i)); } else { true_resp++; } } printf("%d %d",samples.N(), true_resp); }
//Decision Tree void decisiontree ( Mat & trainingData , Mat & trainingClasses , Mat & testData , Mat & testClasses ) { CvDTree dtree ; Mat var_type (3 , 1 , CV_8U ) ; // define attributes as numerical var_type.at < unsigned int >(0 ,0) = CV_VAR_NUMERICAL; var_type.at < unsigned int >(0 ,1) = CV_VAR_NUMERICAL ; // define output node as numerical var_type.at < unsigned int >(0 ,2) = CV_VAR_NUMERICAL; dtree.train ( trainingData , CV_ROW_SAMPLE , trainingClasses , Mat () , Mat () , var_type , Mat () , CvDTreeParams () ) ; Mat predicted ( testClasses.rows , 1 , CV_32F ) ; for ( int i = 0; i < testData.rows ; i ++) { const Mat sample = testData.row ( i ) ; CvDTreeNode * prediction = dtree.predict ( sample ) ; predicted.at < float > (i , 0) = prediction->value ; } cout << " Accuracy_ { TREE } = " << evaluate ( predicted , testClasses ) << endl ; plot_binary ( testData , predicted , " Predictions tree " ) ; }
static void find_decision_boundary_DT() { img.copyTo( imgDst ); Mat trainSamples, trainClasses; prepare_train_data( trainSamples, trainClasses ); // learn classifier CvDTree dtree; Mat var_types( 1, trainSamples.cols + 1, CV_8UC1, Scalar(CV_VAR_ORDERED) ); var_types.at<uchar>( trainSamples.cols ) = CV_VAR_CATEGORICAL; CvDTreeParams params; params.max_depth = 8; params.min_sample_count = 2; params.use_surrogates = false; params.cv_folds = 0; // the number of cross-validation folds params.use_1se_rule = false; params.truncate_pruned_tree = false; dtree.train( trainSamples, CV_ROW_SAMPLE, trainClasses, Mat(), Mat(), var_types, Mat(), params ); Mat testSample(1, 2, CV_32FC1 ); for( int y = 0; y < img.rows; y += testStep ) { for( int x = 0; x < img.cols; x += testStep ) { testSample.at<float>(0) = (float)x; testSample.at<float>(1) = (float)y; int response = (int)dtree.predict( testSample )->value; circle( imgDst, Point(x,y), 2, classColors[response], 1 ); } } }
int main( int argc, char** argv ) { Mat img; char file[255]; //total no of training samples int total_train_samples = 0; for(int cl=0; cl<nr_classes; cl++) { total_train_samples = total_train_samples + train_samples[cl]; } // Training Data Mat training_data = Mat(total_train_samples,feature_size,CV_32FC1); Mat training_label = Mat(total_train_samples,1,CV_32FC1); // training data .csv file ofstream trainingDataCSV; trainingDataCSV.open("./training_data.csv"); int index = 0; for(int cl=0; cl<nr_classes; cl++) { for(int ll=0; ll<train_samples[cl]; ll++) { //assign sample label training_label.at<float>(index+ll,0) = class_labels[cl]; //image feature extraction sprintf(file, "%s/%d/%d.png", pathToImages, class_labels[cl], ll); img = imread(file, 1); if (!img.data) { cout << "File " << file << " not found\n"; exit(1); } imshow("sample",img); waitKey(1); //calculate feature vector vector<float> feature = ColorHistFeature(img); for(int ft=0; ft<feature.size(); ft++) { training_data.at<float>(index+ll,ft) = feature[ft]; trainingDataCSV<<feature[ft]<<","; } trainingDataCSV<<class_labels[cl]<<"\n"; } index = index + train_samples[cl]; } trainingDataCSV.close(); /// Decision Tree // Training float *priors = NULL; CvDTreeParams DTParams = CvDTreeParams(25, // max depth 5, // min sample count 0, // regression accuracy: N/A here false, // compute surrogate split, no missing data 15, // max number of categories (use sub-optimal algorithm for larger numbers) 15, // the number of cross-validation folds false, // use 1SE rule => smaller tree false, // throw away the pruned tree branches priors // the array of priors ); CvDTree DTree; DTree.train(training_data,CV_ROW_SAMPLE,training_label,Mat(),Mat(),Mat(),Mat(),DTParams); // save model DTree.save("training.model"); // load model CvDTree DT; DT.load("training.model"); // test on sample image string filename = string(pathToImages)+"/test.png"; Mat test_img = imread(filename.c_str()); vector<float> test_feature = ColorHistFeature(test_img); CvDTreeNode* result_node = DT.predict(Mat(test_feature),Mat(),false); double predictedClass = result_node->value; cout<<"predictedClass "<<predictedClass<<"\n"; /* //CvMLData for calculating error CvMLData* MLData; MLData = new CvMLData(); MLData->read_csv("training_data.csv"); MLData->set_response_idx(feature_size); // MLData->change_var_type(feature_size,CV_VAR_CATEGORICAL); // calculate training error float error = DT.calc_error(MLData,CV_TRAIN_ERROR,0); cout<<"training error "<<error<<"\n"; */ return 0; }
float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing, CvMat* weak_responses, CvSlice slice, int k) const { float result = 0.0f; if (!weak) return 0.0f; CvSeqReader reader; int weak_count = cvSliceLength( slice, weak[class_count-1] ); CvDTree* tree; if (weak_responses) { if (CV_MAT_TYPE(weak_responses->type) != CV_32F) return 0.0f; if ((k >= 0) && (k<class_count) && (weak_responses->rows != 1)) return 0.0f; if ((k == -1) && (weak_responses->rows != class_count)) return 0.0f; if (weak_responses->cols != weak_count) return 0.0f; } float* sum = new float[class_count]; memset(sum, 0, class_count*sizeof(float)); for (int i=0; i<class_count; ++i) { if ((weak[i]) && (weak_count)) { cvStartReadSeq( weak[i], &reader ); cvSetSeqReaderPos( &reader, slice.start_index ); for (int j=0; j<weak_count; ++j) { CV_READ_SEQ_ELEM( tree, reader ); float p = (float)(tree->predict(_sample, _missing)->value); sum[i] += params.shrinkage * p; if (weak_responses) weak_responses->data.fl[i*weak_count+j] = p; } } } for (int i=0; i<class_count; ++i) sum[i] += base_value; if (class_count == 1) { result = sum[0]; delete[] sum; return result; } if ((k>=0) && (k<class_count)) { result = sum[k]; delete[] sum; return result; } float max = sum[0]; int class_label = 0; for (int i=1; i<class_count; ++i) if (sum[i] > max) { max = sum[i]; class_label = i; } delete[] sum; /* int orig_class_label = -1; for (int i=0; i<get_len(class_labels); ++i) if (class_labels->data.i[i] == class_label+1) orig_class_label = i; */ int orig_class_label = class_labels->data.i[class_label]; return float(orig_class_label); }
bool CvGBTrees::train( const CvMat* _train_data, int _tflag, const CvMat* _responses, const CvMat* _var_idx, const CvMat* _sample_idx, const CvMat* _var_type, const CvMat* _missing_mask, CvGBTreesParams _params, bool /*_update*/ ) //update is not supported { CvMemStorage* storage = 0; params = _params; bool is_regression = problem_type(); clear(); /* n - count of samples m - count of variables */ int n = _train_data->rows; int m = _train_data->cols; if (_tflag != CV_ROW_SAMPLE) { int tmp; CV_SWAP(n,m,tmp); } CvMat* new_responses = cvCreateMat( n, 1, CV_32F); cvZero(new_responses); data = new CvDTreeTrainData( _train_data, _tflag, new_responses, _var_idx, _sample_idx, _var_type, _missing_mask, _params, true, true ); if (_missing_mask) { missing = cvCreateMat(_missing_mask->rows, _missing_mask->cols, _missing_mask->type); cvCopy( _missing_mask, missing); } orig_response = cvCreateMat( 1, n, CV_32F ); int step = (_responses->cols > _responses->rows) ? 1 : _responses->step / CV_ELEM_SIZE(_responses->type); switch (CV_MAT_TYPE(_responses->type)) { case CV_32FC1: { for (int i=0; i<n; ++i) orig_response->data.fl[i] = _responses->data.fl[i*step]; }; break; case CV_32SC1: { for (int i=0; i<n; ++i) orig_response->data.fl[i] = (float) _responses->data.i[i*step]; }; break; default: CV_Error(CV_StsUnmatchedFormats, "Response should be a 32fC1 or 32sC1 vector."); } if (!is_regression) { class_count = 0; unsigned char * mask = new unsigned char[n]; memset(mask, 0, n); // compute the count of different output classes for (int i=0; i<n; ++i) if (!mask[i]) { class_count++; for (int j=i; j<n; ++j) if (int(orig_response->data.fl[j]) == int(orig_response->data.fl[i])) mask[j] = 1; } delete[] mask; class_labels = cvCreateMat(1, class_count, CV_32S); class_labels->data.i[0] = int(orig_response->data.fl[0]); int j = 1; for (int i=1; i<n; ++i) { int k = 0; while ((int(orig_response->data.fl[i]) - class_labels->data.i[k]) && (k<j)) k++; if (k == j) { class_labels->data.i[k] = int(orig_response->data.fl[i]); j++; } } } // inside gbt learning proccess only regression decision trees are built data->is_classifier = false; // preproccessing sample indices if (_sample_idx) { int sample_idx_len = get_len(_sample_idx); switch (CV_MAT_TYPE(_sample_idx->type)) { case CV_32SC1: { sample_idx = cvCreateMat( 1, sample_idx_len, CV_32S ); for (int i=0; i<sample_idx_len; ++i) sample_idx->data.i[i] = _sample_idx->data.i[i]; } break; case CV_8S: case CV_8U: { int active_samples_count = 0; for (int i=0; i<sample_idx_len; ++i) active_samples_count += int( _sample_idx->data.ptr[i] ); sample_idx = cvCreateMat( 1, active_samples_count, CV_32S ); active_samples_count = 0; for (int i=0; i<sample_idx_len; ++i) if (int( _sample_idx->data.ptr[i] )) sample_idx->data.i[active_samples_count++] = i; } break; default: CV_Error(CV_StsUnmatchedFormats, "_sample_idx should be a 32sC1, 8sC1 or 8uC1 vector."); } icvSortFloat(sample_idx->data.fl, sample_idx_len, 0); } else { sample_idx = cvCreateMat( 1, n, CV_32S ); for (int i=0; i<n; ++i) sample_idx->data.i[i] = i; } sum_response = cvCreateMat(class_count, n, CV_32F); sum_response_tmp = cvCreateMat(class_count, n, CV_32F); cvZero(sum_response); delta = 0.0f; /* in the case of a regression problem the initial guess (the zero term in the sum) is set to the mean of all the training responses, that is the best constant model */ if (is_regression) base_value = find_optimal_value(sample_idx); /* in the case of a classification problem the initial guess (the zero term in the sum) is set to zero for all the trees sequences */ else base_value = 0.0f; /* current predicition on all training samples is set to be equal to the base_value */ cvSet( sum_response, cvScalar(base_value) ); weak = new pCvSeq[class_count]; for (int i=0; i<class_count; ++i) { storage = cvCreateMemStorage(); weak[i] = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvDTree*), storage ); storage = 0; } // subsample params and data rng = &cv::theRNG(); int samples_count = get_len(sample_idx); params.subsample_portion = params.subsample_portion <= FLT_EPSILON || 1 - params.subsample_portion <= FLT_EPSILON ? 1 : params.subsample_portion; int train_sample_count = cvFloor(params.subsample_portion * samples_count); if (train_sample_count == 0) train_sample_count = samples_count; int test_sample_count = samples_count - train_sample_count; int* idx_data = new int[samples_count]; subsample_train = cvCreateMatHeader( 1, train_sample_count, CV_32SC1 ); *subsample_train = cvMat( 1, train_sample_count, CV_32SC1, idx_data ); if (test_sample_count) { subsample_test = cvCreateMatHeader( 1, test_sample_count, CV_32SC1 ); *subsample_test = cvMat( 1, test_sample_count, CV_32SC1, idx_data + train_sample_count ); } // training procedure for ( int i=0; i < params.weak_count; ++i ) { do_subsample(); for ( int k=0; k < class_count; ++k ) { find_gradient(k); CvDTree* tree = new CvDTree; tree->train( data, subsample_train ); change_values(tree, k); if (subsample_test) { CvMat x; CvMat x_miss; int* sample_data = sample_idx->data.i; int* subsample_data = subsample_test->data.i; int s_step = (sample_idx->cols > sample_idx->rows) ? 1 : sample_idx->step/CV_ELEM_SIZE(sample_idx->type); for (int j=0; j<get_len(subsample_test); ++j) { int idx = *(sample_data + subsample_data[j]*s_step); float res = 0.0f; if (_tflag == CV_ROW_SAMPLE) cvGetRow( data->train_data, &x, idx); else cvGetCol( data->train_data, &x, idx); if (missing) { if (_tflag == CV_ROW_SAMPLE) cvGetRow( missing, &x_miss, idx); else cvGetCol( missing, &x_miss, idx); res = (float)tree->predict(&x, &x_miss)->value; } else { res = (float)tree->predict(&x)->value; } sum_response_tmp->data.fl[idx + k*n] = sum_response->data.fl[idx + k*n] + params.shrinkage * res; } } cvSeqPush( weak[k], &tree ); tree = 0; } // k=0..class_count CvMat* tmp; tmp = sum_response_tmp; sum_response_tmp = sum_response; sum_response = tmp; tmp = 0; } // i=0..params.weak_count delete[] idx_data; cvReleaseMat(&new_responses); data->free_train_data(); return true; } // CvGBTrees::train(...)