vector<Mat*> predictRealImages(vector<Mat*> imageVector, vector<CvRTrees*> forestVector1, vector<CvRTrees*> forestVector2, int imNum, int imageWidth, int imageHeight, int tileSizeX, int tileSizeY, int overlap, int numOfTrees,double desicionThres1,double desicionThres2, int numOfPointPairs1, int numOfPointPairs2, int numOfPointPairs3, string charType, string featureType, bool useNoise) { printf("Detecting characters in images....\n\n"); //CalcRectSample calcRect; DWORD start, stop; Scalar mean, std; int xPos, yPos, predPosx, predPosy, rectFiltNum; int overlapTileX = tileSizeX/overlap; int overlapTileY = tileSizeY/overlap; Mat imRect, imRectReSize, imRectThres, integralRect,thresholdedImage, imRectThresholded; Mat featureMat1; Mat featureMat2 = Mat::zeros(1,numOfPointPairs2,CV_32FC1); Mat featureMat3 = Mat::zeros(1,numOfPointPairs3,CV_32FC1); vector<Mat*> predictions; Mat* pred; int tileNumX = imageWidth/(tileSizeX/overlap) - 2;//imageWidth/charSizeX*overlap - (overlap-1); int tileNumY = imageHeight/(tileSizeY/overlap) - 2; //imageHeight/charSizeY*overlap -(overlap-1); if(featureType == "rects") { rectFiltNum = calcRectFiltNum(tileSizeX,tileSizeY)+1; featureMat1 = Mat::zeros(1,rectFiltNum,CV_32FC1); } else if(featureType == "points") { featureMat1 = Mat::zeros(1,numOfPointPairs1,CV_32FC1); } Mat treePred; CvForestTree* tree; double minVal, maxVal; int minIndx[2] = {0,0}; int maxIndx[2] = {0,0}; int numOfForests = (int)forestVector1.size(); Mat pointPairVector1 = Mat::zeros(numOfPointPairs1,4,CV_32SC1); Mat pointPairVector2 = Mat::zeros(numOfPointPairs2,4,CV_32SC1); Mat pointPairVector3 = Mat::zeros(numOfPointPairs3,4,CV_32SC1); if(featureType == "points") { cv::RNG rng(0); int distThreshold = 10; int x1, x2, y1, y2; for(int i=0; i<numOfPointPairs1; i++) { x1 = 0; y1 = 0; x2 = 0; y2 = 0; while(abs(x1-x2) < distThreshold && abs(y1-y2) < distThreshold) { x1 = rng.uniform(tileSizeX/4,tileSizeX*3/4); y1 = rng.uniform(tileSizeY/4,tileSizeY*3/4); x2 = rng.uniform(0,tileSizeX); y2 = rng.uniform(0,tileSizeY); } pointPairVector1.at<int>(i,0) = x1; pointPairVector1.at<int>(i,1) = y1; pointPairVector1.at<int>(i,2) = x2; pointPairVector1.at<int>(i,3) = y2; } } RNG rng(0); int distThreshold = 10; int x1, x2, y1, y2; for(int i=0; i<numOfPointPairs2; i++) { x1 = 0; y1 = 0; x2 = 0; y2 = 0; while(abs(x1-x2) < distThreshold && abs(y1-y2) < distThreshold) { x1 = rng.uniform(tileSizeX/4,tileSizeX*3/4); y1 = rng.uniform(tileSizeY/4,tileSizeY*3/4); x2 = rng.uniform(0,tileSizeX); y2 = rng.uniform(0,tileSizeY); } pointPairVector2.at<int>(i,0) = x1/(tileSizeX/8); pointPairVector2.at<int>(i,1) = y1/(tileSizeY/8); pointPairVector2.at<int>(i,2) = x2/(tileSizeX/8); pointPairVector2.at<int>(i,3) = y2/(tileSizeY/8); } for(int i=0; i<numOfPointPairs3; i++) { x1 = 0; y1 = 0; x2 = 0; y2 = 0; while(abs(x1-x2) < distThreshold && abs(y1-y2) < distThreshold) { x1 = rng.uniform(tileSizeX/4,tileSizeX*3/4); y1 = rng.uniform(tileSizeY/4,tileSizeY*3/4); x2 = rng.uniform(0,tileSizeX); y2 = rng.uniform(0,tileSizeY); } pointPairVector3.at<int>(i,0) = x1/(tileSizeX/16); pointPairVector3.at<int>(i,1) = y1/(tileSizeY/16); pointPairVector3.at<int>(i,2) = x2/(tileSizeX/16); pointPairVector3.at<int>(i,3) = y2/(tileSizeY/16); } start = GetTickCount(); for(int im=0; im<imNum; im++) { //cv::Laplacian(*imageVector[im],laplacianImage,-1,5); pred = new Mat(Mat::zeros(tileNumY,tileNumX, CV_8UC1)); cv::adaptiveThreshold(*imageVector[im],thresholdedImage,255,ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,tileSizeX+1,20); xPos = 0; yPos = 0; predPosx = 0; predPosy = 0; while(yPos < imageHeight - tileSizeY) { while(xPos < imageWidth - tileSizeX) { imRect = (*imageVector[im])(Rect(xPos,yPos,tileSizeX,tileSizeY)); resize(imRect,imRectReSize,Size(8,8)); featureMat2 = Mat::zeros(1,numOfPointPairs2,CV_32FC1); calcPointPairsFeaturesTile(imRectReSize,featureMat2,pointPairVector2,numOfPointPairs2,0, false); imRectThresholded = thresholdedImage(Rect(xPos,yPos,tileSizeX,tileSizeY)); meanStdDev(imRectThresholded,mean,std); /* int reSizeTo = 8; featureMat2 = Mat::zeros(1,reSizeTo*reSizeTo,CV_32FC1); calcStdTile(imRectThresholded,featureMat2,0, reSizeTo);*/ treePred = Mat::zeros(2,1, CV_32SC1); for(int t=0; t<forestVector2[0]->get_tree_count(); t++) { tree = forestVector2[0]->get_tree(t); treePred.at<int>(static_cast<int>(tree->predict(featureMat2)->value),0)++; } if(treePred.at<int>(1,0) > desicionThres2*forestVector2[0]->get_tree_count()) { //imshow("sfsdf",imRectThresholded); //waitKey(); if(featureType == "rects") calcRectFeatureTile(imRect,featureMat1,tileSizeX,tileSizeY,0); else if(featureType == "points") { featureMat1 = Mat::zeros(1,numOfPointPairs1,CV_32FC1); calcPointPairsFeaturesTile(imRectThresholded,featureMat1,pointPairVector1,numOfPointPairs1,0, useNoise); } else abort(); //Loop over all trees treePred = Mat::zeros(256,1, CV_32SC1); for(int f=0; f<numOfForests; f++) { for(int t=0; t<forestVector1[f]->get_tree_count(); t++) { tree = forestVector1[f]->get_tree(t); treePred.at<int>(static_cast<int>(tree->predict(featureMat1)->value),0)++; } } cv::minMaxIdx(treePred,&minVal,&maxVal,minIndx,maxIndx); //cout << (char)(*maxIndx) << "\t" << maxVal/(numOfForests*numOfTrees) << endl; if(maxVal > numOfTrees*numOfForests*desicionThres1) pred->at<uchar>(predPosy,predPosx) = *maxIndx; } predPosx++; xPos += overlapTileX; } xPos = 0; yPos += overlapTileY; predPosx = 0; predPosy++; } removeFalsePredictions(*pred); predictions.push_back(pred); } stop = GetTickCount(); std::cout << "Average time per image: " << (float)(stop - start)/((float)imNum)/1000 << std::endl << std::endl; return predictions; }
bool CvRTrees::grow_forest( const CvTermCriteria term_crit ) { CvMat* sample_idx_mask_for_tree = 0; CvMat* sample_idx_for_tree = 0; const int max_ntrees = term_crit.max_iter; const double max_oob_err = term_crit.epsilon; const int dims = data->var_count; float maximal_response = 0; CvMat* oob_sample_votes = 0; CvMat* oob_responses = 0; float* oob_samples_perm_ptr= 0; float* samples_ptr = 0; uchar* missing_ptr = 0; float* true_resp_ptr = 0; bool is_oob_or_vimportance = (max_oob_err > 0 && term_crit.type != CV_TERMCRIT_ITER) || var_importance; // oob_predictions_sum[i] = sum of predicted values for the i-th sample // oob_num_of_predictions[i] = number of summands // (number of predictions for the i-th sample) // initialize these variable to avoid warning C4701 CvMat oob_predictions_sum = cvMat( 1, 1, CV_32FC1 ); CvMat oob_num_of_predictions = cvMat( 1, 1, CV_32FC1 ); nsamples = data->sample_count; nclasses = data->get_num_classes(); if ( is_oob_or_vimportance ) { if( data->is_classifier ) { oob_sample_votes = cvCreateMat( nsamples, nclasses, CV_32SC1 ); cvZero(oob_sample_votes); } else { // oob_responses[0,i] = oob_predictions_sum[i] // = sum of predicted values for the i-th sample // oob_responses[1,i] = oob_num_of_predictions[i] // = number of summands (number of predictions for the i-th sample) oob_responses = cvCreateMat( 2, nsamples, CV_32FC1 ); cvZero(oob_responses); cvGetRow( oob_responses, &oob_predictions_sum, 0 ); cvGetRow( oob_responses, &oob_num_of_predictions, 1 ); } oob_samples_perm_ptr = (float*)cvAlloc( sizeof(float)*nsamples*dims ); samples_ptr = (float*)cvAlloc( sizeof(float)*nsamples*dims ); missing_ptr = (uchar*)cvAlloc( sizeof(uchar)*nsamples*dims ); true_resp_ptr = (float*)cvAlloc( sizeof(float)*nsamples ); data->get_vectors( 0, samples_ptr, missing_ptr, true_resp_ptr ); double minval, maxval; CvMat responses = cvMat(1, nsamples, CV_32FC1, true_resp_ptr); cvMinMaxLoc( &responses, &minval, &maxval ); maximal_response = (float)MAX( MAX( fabs(minval), fabs(maxval) ), 0 ); } trees = (CvForestTree**)cvAlloc( sizeof(trees[0])*max_ntrees ); memset( trees, 0, sizeof(trees[0])*max_ntrees ); sample_idx_mask_for_tree = cvCreateMat( 1, nsamples, CV_8UC1 ); sample_idx_for_tree = cvCreateMat( 1, nsamples, CV_32SC1 ); ntrees = 0; while( ntrees < max_ntrees ) { int i, oob_samples_count = 0; double ncorrect_responses = 0; // used for estimation of variable importance CvForestTree* tree = 0; cvZero( sample_idx_mask_for_tree ); for(i = 0; i < nsamples; i++ ) //form sample for creation one tree { int idx = cvRandInt( &rng ) % nsamples; sample_idx_for_tree->data.i[i] = idx; sample_idx_mask_for_tree->data.ptr[idx] = 0xFF; } trees[ntrees] = new CvForestTree(); tree = trees[ntrees]; tree->train( data, sample_idx_for_tree, this ); if ( is_oob_or_vimportance ) { CvMat sample, missing; // form array of OOB samples indices and get these samples sample = cvMat( 1, dims, CV_32FC1, samples_ptr ); missing = cvMat( 1, dims, CV_8UC1, missing_ptr ); oob_error = 0; for( i = 0; i < nsamples; i++, sample.data.fl += dims, missing.data.ptr += dims ) { CvDTreeNode* predicted_node = 0; // check if the sample is OOB if( sample_idx_mask_for_tree->data.ptr[i] ) continue; // predict oob samples if( !predicted_node ) predicted_node = tree->predict(&sample, &missing, true); if( !data->is_classifier ) //regression { double avg_resp, resp = predicted_node->value; oob_predictions_sum.data.fl[i] += (float)resp; oob_num_of_predictions.data.fl[i] += 1; // compute oob error avg_resp = oob_predictions_sum.data.fl[i]/oob_num_of_predictions.data.fl[i]; avg_resp -= true_resp_ptr[i]; oob_error += avg_resp*avg_resp; resp = (resp - true_resp_ptr[i])/maximal_response; ncorrect_responses += exp( -resp*resp ); } else //classification { double prdct_resp; CvPoint max_loc; CvMat votes; cvGetRow(oob_sample_votes, &votes, i); votes.data.i[predicted_node->class_idx]++; // compute oob error cvMinMaxLoc( &votes, 0, 0, 0, &max_loc ); prdct_resp = data->cat_map->data.i[max_loc.x]; oob_error += (fabs(prdct_resp - true_resp_ptr[i]) < FLT_EPSILON) ? 0 : 1; ncorrect_responses += cvRound(predicted_node->value - true_resp_ptr[i]) == 0; } oob_samples_count++; } if( oob_samples_count > 0 ) oob_error /= (double)oob_samples_count; // estimate variable importance if( var_importance && oob_samples_count > 0 ) { int m; memcpy( oob_samples_perm_ptr, samples_ptr, dims*nsamples*sizeof(float)); for( m = 0; m < dims; m++ ) { double ncorrect_responses_permuted = 0; // randomly permute values of the m-th variable in the oob samples float* mth_var_ptr = oob_samples_perm_ptr + m; for( i = 0; i < nsamples; i++ ) { int i1, i2; float temp; if( sample_idx_mask_for_tree->data.ptr[i] ) //the sample is not OOB continue; i1 = cvRandInt( &rng ) % nsamples; i2 = cvRandInt( &rng ) % nsamples; CV_SWAP( mth_var_ptr[i1*dims], mth_var_ptr[i2*dims], temp ); // turn values of (m-1)-th variable, that were permuted // at the previous iteration, untouched if( m > 1 ) oob_samples_perm_ptr[i*dims+m-1] = samples_ptr[i*dims+m-1]; } // predict "permuted" cases and calculate the number of votes for the // correct class in the variable-m-permuted oob data sample = cvMat( 1, dims, CV_32FC1, oob_samples_perm_ptr ); missing = cvMat( 1, dims, CV_8UC1, missing_ptr ); for( i = 0; i < nsamples; i++, sample.data.fl += dims, missing.data.ptr += dims ) { double predct_resp, true_resp; if( sample_idx_mask_for_tree->data.ptr[i] ) //the sample is not OOB continue; predct_resp = tree->predict(&sample, &missing, true)->value; true_resp = true_resp_ptr[i]; if( data->is_classifier ) ncorrect_responses_permuted += cvRound(true_resp - predct_resp) == 0; else { true_resp = (true_resp - predct_resp)/maximal_response; ncorrect_responses_permuted += exp( -true_resp*true_resp ); } } var_importance->data.fl[m] += (float)(ncorrect_responses - ncorrect_responses_permuted); } } } ntrees++; if( term_crit.type != CV_TERMCRIT_ITER && oob_error < max_oob_err ) break; } if( var_importance ) { for ( int vi = 0; vi < var_importance->cols; vi++ ) var_importance->data.fl[vi] = ( var_importance->data.fl[vi] > 0 ) ? var_importance->data.fl[vi] : 0; cvNormalize( var_importance, var_importance, 1., 0, CV_L1 ); } cvFree( &oob_samples_perm_ptr ); cvFree( &samples_ptr ); cvFree( &missing_ptr ); cvFree( &true_resp_ptr ); cvReleaseMat( &sample_idx_mask_for_tree ); cvReleaseMat( &sample_idx_for_tree ); cvReleaseMat( &oob_sample_votes ); cvReleaseMat( &oob_responses ); return true; }
int main(int argc, char** argv) { // std::cout<<FLT_EPSILON<<std::endl; cv::Mat training_data, training_labels,testing_data, testing_labels; training_data = read_rgbd_data_cv(argv[1],NUMBER_OF_TRAINING_SAMPLES); training_labels = read_rgbd_data_cv(argv[2], NUMBER_OF_TRAINING_SAMPLES); testing_data = read_rgbd_data_cv(argv[3],NUMBER_OF_TESTING_SAMPLES); testing_labels = read_rgbd_data_cv(argv[4], NUMBER_OF_TESTING_SAMPLES); printf("dataset specs: %d samples with %d features\n", training_data.rows, training_data.cols); // define all the attributes as numerical // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) // that can be assigned on a per attribute basis cv::Mat var_type = cv::Mat(training_data.cols + 1, 1, CV_8U ); var_type.setTo(cv::Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical var_type.at<uchar>(training_data.cols, 0) = CV_VAR_CATEGORICAL; // the labels are categorical /********************************步骤1:定义初始化Random Trees的参数******************************/ float priors[] = {1,1,1,1,1}; // weights of each classification for classes CvRTParams params = CvRTParams(25, // max depth 50, // min sample count 0, // regression accuracy: N/A here false, // compute surrogate split, no missing data 15, // max number of categories (use sub-optimal algorithm for larger numbers) priors, // the array of priors false, // calculate variable importance 20, // number of variables randomly selected at node and used to find the best split(s). NUMBER_OF_TREES, // max number of trees in the forest 0.01f, // forrest accuracy CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria ); /****************************步骤2:训练 Random Decision Forest(RDF)分类器*********************/ // printf( "\nUsing training database: %s\n\n", argv[1]); CvRTrees* rtree = new CvRTrees; rtree->train(training_data, CV_ROW_SAMPLE, training_labels, cv::Mat(), cv::Mat(), var_type, cv::Mat(), params); // perform classifier testing and report results cv::Mat test_sample, train_sample; int correct_class = 0; int wrong_class = 0; int result; int label; int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0}; int false_negatives [NUMBER_OF_CLASSES] = {0,0,0,0,0}; CvDTreeNode* leaf_nodes [training_data.rows]; for (int tsample = 0; tsample < training_data.rows; tsample++) { train_sample = training_data.row(tsample); CvForestTree* tree = rtree->get_tree(1); CvDTreeNode* leaf_node = tree->predict(train_sample, cv::Mat()); leaf_nodes[tsample] = leaf_node; } // printf( "\nUsing testing database: %s\n\n", argv[2]); for (int tsample = 0; tsample < testing_data.rows; tsample++) { // extract a row from the testing matrix test_sample = testing_data.row(tsample); // train on the testing data: // test_sample = training_data.row(tsample); /********************************步骤3:预测*********************************************/ result = (int) rtree->predict(test_sample, cv::Mat()); label = (int) testing_labels.at<float>(tsample, 0); printf("Testing Sample %i -> class result (digit %d) - label (digit %d)\n", tsample, result, label); // get the leaf nodes of the first tree in the forest /*CvForestTree* tree = rtree->get_tree(0); std::list<const CvDTreeNode*> leaf_list; leaf_list = get_leaf_node( tree ); printf("Number of Leaf nodes: %ld\n", leaf_list.size());*/ // if the prediction and the (true) testing classification are the same // (N.B. openCV uses a floating point decision tree implementation!) if (fabs(result - label) >= FLT_EPSILON) { // if they differ more than floating point error => wrong class wrong_class++; false_positives[(int) result]++; false_negatives[(int) testing_labels.at<float>(tsample, 0)]++; } else { // otherwise correct correct_class++; } } printf( // "\nResults on the testing database: %s\n" "\tCorrect classification: %d (%g%%)\n" "\tWrong classifications: %d (%g%%)\n", // argv[2], correct_class, (double) correct_class*100/testing_data.rows, wrong_class, (double) wrong_class*100/testing_data.rows); for (int i = 0; i < NUMBER_OF_CLASSES; i++) { printf( "\tClass (digit %d) false postives %d (%g%%)\n\t false negatives %d (%g%%)\n", i, false_positives[i], (double) false_positives[i]*100/testing_data.rows, false_negatives[i], (double) false_negatives[i]*100/testing_data.rows); } // get all the leaf nodes in the forest for (int i = 0; i < NUMBER_OF_TREES; i ++) { CvForestTree* tree = rtree->get_tree(i); std::list<const CvDTreeNode*> leaf_list; leaf_list = get_leaf_node( tree ); } //get training_sample indices for leaf nodes std::list<leaf_samples> node_indices; for (int i = 0; i < training_data.rows; i++) { CvDTreeNode* leaf_node = leaf_nodes[i]; if (leaf_node != NULL) { leaf_samples leaf_sample; leaf_sample.leaf = leaf_node; leaf_sample.indices.push_front(i); printf("\nValue of leaf: %f\n", leaf_node->value); printf("Smaple indices for leaf:\n"); printf(" %d", i); for (int j=i+1; j < training_data.rows; j++) { if (leaf_node == leaf_nodes[j]) { leaf_sample.indices.push_front(j); printf(" %lu", j); leaf_nodes[j] = NULL; } } node_indices.push_front(leaf_sample); } } printf("\nSize of node_indices: %d\n", node_indices.size()); //get labels and features //get double pointers for features and labels const double* p = testing_data.ptr<double>(0); std::vector<double> vec(p, p + testing_data.cols); // all matrix memory free by destructors // all OK : main returns 0 // result = rtree->predict(testing_data.row(79), cv::Mat()); // float andi = result - testing_labels.at<float>(79, 0); // // std::cout<<training_labels.row(0).col(0)<<std::endl; // std::cout<<andi<<std::endl; return 0; }