예제 #1
0
vector<Mat*> predictRealImages(vector<Mat*> imageVector, vector<CvRTrees*> forestVector1, vector<CvRTrees*> forestVector2, int imNum, int imageWidth, int imageHeight, int tileSizeX, int tileSizeY, int overlap, 
	int numOfTrees,double desicionThres1,double desicionThres2, int numOfPointPairs1, int numOfPointPairs2, int numOfPointPairs3, string charType, string featureType, bool useNoise)
{
	printf("Detecting characters in images....\n\n");
	//CalcRectSample calcRect;
	DWORD start, stop;
	Scalar mean, std;
	int xPos, yPos, predPosx, predPosy, rectFiltNum;
	int overlapTileX = tileSizeX/overlap;
	int overlapTileY = tileSizeY/overlap;

	Mat imRect, imRectReSize, imRectThres, integralRect,thresholdedImage, imRectThresholded;
	Mat featureMat1;
	Mat featureMat2 = Mat::zeros(1,numOfPointPairs2,CV_32FC1);
	Mat featureMat3 = Mat::zeros(1,numOfPointPairs3,CV_32FC1);
	vector<Mat*> predictions;
	Mat* pred;
	int tileNumX = imageWidth/(tileSizeX/overlap) - 2;//imageWidth/charSizeX*overlap - (overlap-1);
	int tileNumY = imageHeight/(tileSizeY/overlap) - 2; //imageHeight/charSizeY*overlap -(overlap-1);

	if(featureType == "rects")
	{
		rectFiltNum = calcRectFiltNum(tileSizeX,tileSizeY)+1;
		featureMat1 = Mat::zeros(1,rectFiltNum,CV_32FC1);
	}
	else if(featureType == "points")
	{
		featureMat1 = Mat::zeros(1,numOfPointPairs1,CV_32FC1);
	}

	Mat treePred;
	CvForestTree* tree;
	double minVal, maxVal;
	int minIndx[2] = {0,0};
	int maxIndx[2] = {0,0};
	int numOfForests = (int)forestVector1.size();
	Mat pointPairVector1 = Mat::zeros(numOfPointPairs1,4,CV_32SC1);
	Mat pointPairVector2 = Mat::zeros(numOfPointPairs2,4,CV_32SC1);
	Mat pointPairVector3 = Mat::zeros(numOfPointPairs3,4,CV_32SC1);

	if(featureType == "points")
	{
		cv::RNG rng(0);
		int distThreshold = 10;
		int x1, x2, y1, y2;
		for(int i=0; i<numOfPointPairs1; i++)
		{
			x1 = 0;
			y1 = 0;
			x2 = 0;
			y2 = 0;

			while(abs(x1-x2) < distThreshold && abs(y1-y2) < distThreshold)
			{
				x1 = rng.uniform(tileSizeX/4,tileSizeX*3/4);
				y1 = rng.uniform(tileSizeY/4,tileSizeY*3/4);
				x2 = rng.uniform(0,tileSizeX);
				y2 = rng.uniform(0,tileSizeY);
			}
			pointPairVector1.at<int>(i,0) = x1;
			pointPairVector1.at<int>(i,1) = y1;
			pointPairVector1.at<int>(i,2) = x2;
			pointPairVector1.at<int>(i,3) = y2;
		}
	}

	RNG rng(0);
	int distThreshold = 10;
	int x1, x2, y1, y2;
	for(int i=0; i<numOfPointPairs2; i++)
	{
		x1 = 0;
		y1 = 0;
		x2 = 0;
		y2 = 0;

		while(abs(x1-x2) < distThreshold && abs(y1-y2) < distThreshold)
		{
			x1 = rng.uniform(tileSizeX/4,tileSizeX*3/4);
			y1 = rng.uniform(tileSizeY/4,tileSizeY*3/4);
			x2 = rng.uniform(0,tileSizeX);
			y2 = rng.uniform(0,tileSizeY);
		}

		pointPairVector2.at<int>(i,0) = x1/(tileSizeX/8);
		pointPairVector2.at<int>(i,1) = y1/(tileSizeY/8);
		pointPairVector2.at<int>(i,2) = x2/(tileSizeX/8);
		pointPairVector2.at<int>(i,3) = y2/(tileSizeY/8);
	}

	for(int i=0; i<numOfPointPairs3; i++)
	{
		x1 = 0;
		y1 = 0;
		x2 = 0;
		y2 = 0;

		while(abs(x1-x2) < distThreshold && abs(y1-y2) < distThreshold)
		{
			x1 = rng.uniform(tileSizeX/4,tileSizeX*3/4);
			y1 = rng.uniform(tileSizeY/4,tileSizeY*3/4);
			x2 = rng.uniform(0,tileSizeX);
			y2 = rng.uniform(0,tileSizeY);
		}

		pointPairVector3.at<int>(i,0) = x1/(tileSizeX/16);
		pointPairVector3.at<int>(i,1) = y1/(tileSizeY/16);
		pointPairVector3.at<int>(i,2) = x2/(tileSizeX/16);
		pointPairVector3.at<int>(i,3) = y2/(tileSizeY/16);
	}

	start = GetTickCount();
	for(int im=0; im<imNum; im++)
	{
		//cv::Laplacian(*imageVector[im],laplacianImage,-1,5);
		pred = new Mat(Mat::zeros(tileNumY,tileNumX, CV_8UC1));
		cv::adaptiveThreshold(*imageVector[im],thresholdedImage,255,ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,tileSizeX+1,20);
		xPos = 0;
		yPos = 0;
		predPosx = 0;
		predPosy = 0;

		while(yPos < imageHeight - tileSizeY)
		{
			while(xPos < imageWidth - tileSizeX)
			{

				imRect = (*imageVector[im])(Rect(xPos,yPos,tileSizeX,tileSizeY));
				resize(imRect,imRectReSize,Size(8,8));
				featureMat2 = Mat::zeros(1,numOfPointPairs2,CV_32FC1);
				calcPointPairsFeaturesTile(imRectReSize,featureMat2,pointPairVector2,numOfPointPairs2,0, false);

				imRectThresholded = thresholdedImage(Rect(xPos,yPos,tileSizeX,tileSizeY));
				meanStdDev(imRectThresholded,mean,std);
				/*
				int reSizeTo = 8;
				featureMat2 = Mat::zeros(1,reSizeTo*reSizeTo,CV_32FC1);
				calcStdTile(imRectThresholded,featureMat2,0, reSizeTo);*/

				treePred = Mat::zeros(2,1, CV_32SC1);

			
				for(int t=0; t<forestVector2[0]->get_tree_count(); t++)
				{
					tree = forestVector2[0]->get_tree(t);
					treePred.at<int>(static_cast<int>(tree->predict(featureMat2)->value),0)++;
				}

				if(treePred.at<int>(1,0) > desicionThres2*forestVector2[0]->get_tree_count())
				{
					//imshow("sfsdf",imRectThresholded);
					//waitKey();

						if(featureType == "rects")
							calcRectFeatureTile(imRect,featureMat1,tileSizeX,tileSizeY,0);
						else if(featureType == "points")
						{
							featureMat1 = Mat::zeros(1,numOfPointPairs1,CV_32FC1);
							calcPointPairsFeaturesTile(imRectThresholded,featureMat1,pointPairVector1,numOfPointPairs1,0, useNoise);
						}
						else
							abort();

						//Loop over all trees

						treePred = Mat::zeros(256,1, CV_32SC1);

						for(int f=0; f<numOfForests; f++)
						{
							for(int t=0; t<forestVector1[f]->get_tree_count(); t++)
							{
								tree = forestVector1[f]->get_tree(t);
								treePred.at<int>(static_cast<int>(tree->predict(featureMat1)->value),0)++;
							}
						}
						cv::minMaxIdx(treePred,&minVal,&maxVal,minIndx,maxIndx);
						//cout << (char)(*maxIndx) << "\t" << maxVal/(numOfForests*numOfTrees) << endl;
						if(maxVal > numOfTrees*numOfForests*desicionThres1)
							pred->at<uchar>(predPosy,predPosx) = *maxIndx;
					
				}

				predPosx++;
				xPos += overlapTileX;
			}
			xPos = 0;
			yPos += overlapTileY;
			predPosx = 0;
			predPosy++;
		}
		removeFalsePredictions(*pred);
		predictions.push_back(pred);
	}
	stop = GetTickCount();
	std::cout << "Average time per image: " << (float)(stop - start)/((float)imNum)/1000 << std::endl << std::endl; 
	return predictions;
}
예제 #2
0
bool CvRTrees::grow_forest( const CvTermCriteria term_crit )
{
    CvMat* sample_idx_mask_for_tree = 0;
    CvMat* sample_idx_for_tree      = 0;

    const int max_ntrees = term_crit.max_iter;
    const double max_oob_err = term_crit.epsilon;

    const int dims = data->var_count;
    float maximal_response = 0;

    CvMat* oob_sample_votes	   = 0;
    CvMat* oob_responses       = 0;

    float* oob_samples_perm_ptr= 0;

    float* samples_ptr     = 0;
    uchar* missing_ptr     = 0;
    float* true_resp_ptr   = 0;
    bool is_oob_or_vimportance = (max_oob_err > 0 && term_crit.type != CV_TERMCRIT_ITER) || var_importance;

    // oob_predictions_sum[i] = sum of predicted values for the i-th sample
    // oob_num_of_predictions[i] = number of summands
    //                            (number of predictions for the i-th sample)
    // initialize these variable to avoid warning C4701
    CvMat oob_predictions_sum = cvMat( 1, 1, CV_32FC1 );
    CvMat oob_num_of_predictions = cvMat( 1, 1, CV_32FC1 );
     
    nsamples = data->sample_count;
    nclasses = data->get_num_classes();

    if ( is_oob_or_vimportance )
    {
        if( data->is_classifier )
        {
            oob_sample_votes = cvCreateMat( nsamples, nclasses, CV_32SC1 );
            cvZero(oob_sample_votes);
        }
        else
        {
            // oob_responses[0,i] = oob_predictions_sum[i]
            //    = sum of predicted values for the i-th sample
            // oob_responses[1,i] = oob_num_of_predictions[i]
            //    = number of summands (number of predictions for the i-th sample)
            oob_responses = cvCreateMat( 2, nsamples, CV_32FC1 );
            cvZero(oob_responses);
            cvGetRow( oob_responses, &oob_predictions_sum, 0 );
            cvGetRow( oob_responses, &oob_num_of_predictions, 1 );
        }
        
        oob_samples_perm_ptr     = (float*)cvAlloc( sizeof(float)*nsamples*dims );
        samples_ptr              = (float*)cvAlloc( sizeof(float)*nsamples*dims );
        missing_ptr              = (uchar*)cvAlloc( sizeof(uchar)*nsamples*dims );
        true_resp_ptr            = (float*)cvAlloc( sizeof(float)*nsamples );            

        data->get_vectors( 0, samples_ptr, missing_ptr, true_resp_ptr );
        
        double minval, maxval;
        CvMat responses = cvMat(1, nsamples, CV_32FC1, true_resp_ptr);
        cvMinMaxLoc( &responses, &minval, &maxval );
        maximal_response = (float)MAX( MAX( fabs(minval), fabs(maxval) ), 0 );
    }

    trees = (CvForestTree**)cvAlloc( sizeof(trees[0])*max_ntrees );
    memset( trees, 0, sizeof(trees[0])*max_ntrees );

    sample_idx_mask_for_tree = cvCreateMat( 1, nsamples, CV_8UC1 );
    sample_idx_for_tree      = cvCreateMat( 1, nsamples, CV_32SC1 );

    ntrees = 0;
    while( ntrees < max_ntrees )
    {
        int i, oob_samples_count = 0;
        double ncorrect_responses = 0; // used for estimation of variable importance
        CvForestTree* tree = 0;

        cvZero( sample_idx_mask_for_tree );
        for(i = 0; i < nsamples; i++ ) //form sample for creation one tree
        {
            int idx = cvRandInt( &rng ) % nsamples;
            sample_idx_for_tree->data.i[i] = idx;
            sample_idx_mask_for_tree->data.ptr[idx] = 0xFF;
        }

        trees[ntrees] = new CvForestTree();
        tree = trees[ntrees];
        tree->train( data, sample_idx_for_tree, this );

        if ( is_oob_or_vimportance )
        {
            CvMat sample, missing;
            // form array of OOB samples indices and get these samples
            sample   = cvMat( 1, dims, CV_32FC1, samples_ptr );
            missing  = cvMat( 1, dims, CV_8UC1,  missing_ptr );

            oob_error = 0;
            for( i = 0; i < nsamples; i++,
                sample.data.fl += dims, missing.data.ptr += dims )
            {
                CvDTreeNode* predicted_node = 0;
                // check if the sample is OOB
                if( sample_idx_mask_for_tree->data.ptr[i] )
                    continue;

                // predict oob samples
                if( !predicted_node )
                    predicted_node = tree->predict(&sample, &missing, true);

                if( !data->is_classifier ) //regression
                {
                    double avg_resp, resp = predicted_node->value;
                    oob_predictions_sum.data.fl[i] += (float)resp;
                    oob_num_of_predictions.data.fl[i] += 1;

                    // compute oob error
                    avg_resp = oob_predictions_sum.data.fl[i]/oob_num_of_predictions.data.fl[i];
                    avg_resp -= true_resp_ptr[i];
                    oob_error += avg_resp*avg_resp;
                    resp = (resp - true_resp_ptr[i])/maximal_response;
                    ncorrect_responses += exp( -resp*resp );
                }
                else //classification
                {
                    double prdct_resp;
                    CvPoint max_loc;
                    CvMat votes;

                    cvGetRow(oob_sample_votes, &votes, i);
                    votes.data.i[predicted_node->class_idx]++;

                    // compute oob error
                    cvMinMaxLoc( &votes, 0, 0, 0, &max_loc );

                    prdct_resp = data->cat_map->data.i[max_loc.x];
                    oob_error += (fabs(prdct_resp - true_resp_ptr[i]) < FLT_EPSILON) ? 0 : 1;

                    ncorrect_responses += cvRound(predicted_node->value - true_resp_ptr[i]) == 0;
                }
                oob_samples_count++;
            }
            if( oob_samples_count > 0 )
                oob_error /= (double)oob_samples_count;

            // estimate variable importance
            if( var_importance && oob_samples_count > 0 )
            {
                int m;

                memcpy( oob_samples_perm_ptr, samples_ptr, dims*nsamples*sizeof(float));
                for( m = 0; m < dims; m++ )
                {
                    double ncorrect_responses_permuted = 0;
                    // randomly permute values of the m-th variable in the oob samples
                    float* mth_var_ptr = oob_samples_perm_ptr + m;

                    for( i = 0; i < nsamples; i++ )
                    {
                        int i1, i2;
                        float temp;

                        if( sample_idx_mask_for_tree->data.ptr[i] ) //the sample is not OOB
                            continue;
                        i1 = cvRandInt( &rng ) % nsamples;
                        i2 = cvRandInt( &rng ) % nsamples;
                        CV_SWAP( mth_var_ptr[i1*dims], mth_var_ptr[i2*dims], temp );

                        // turn values of (m-1)-th variable, that were permuted
                        // at the previous iteration, untouched
                        if( m > 1 )
                            oob_samples_perm_ptr[i*dims+m-1] = samples_ptr[i*dims+m-1];
                    }

                    // predict "permuted" cases and calculate the number of votes for the
                    // correct class in the variable-m-permuted oob data
                    sample  = cvMat( 1, dims, CV_32FC1, oob_samples_perm_ptr );
                    missing = cvMat( 1, dims, CV_8UC1, missing_ptr );
                    for( i = 0; i < nsamples; i++,
                        sample.data.fl += dims, missing.data.ptr += dims )
                    {
                        double predct_resp, true_resp;

                        if( sample_idx_mask_for_tree->data.ptr[i] ) //the sample is not OOB
                            continue;

                        predct_resp = tree->predict(&sample, &missing, true)->value;
                        true_resp   = true_resp_ptr[i];
                        if( data->is_classifier )
                            ncorrect_responses_permuted += cvRound(true_resp - predct_resp) == 0;
                        else
                        {
                            true_resp = (true_resp - predct_resp)/maximal_response;
                            ncorrect_responses_permuted += exp( -true_resp*true_resp );
                        }
                    }
                    var_importance->data.fl[m] += (float)(ncorrect_responses
                        - ncorrect_responses_permuted);
                }
            }
        }
        ntrees++;
        if( term_crit.type != CV_TERMCRIT_ITER && oob_error < max_oob_err )
            break;
    }

    if( var_importance )
    {
        for ( int vi = 0; vi < var_importance->cols; vi++ )
                var_importance->data.fl[vi] = ( var_importance->data.fl[vi] > 0 ) ?
                    var_importance->data.fl[vi] : 0;
        cvNormalize( var_importance, var_importance, 1., 0, CV_L1 );
    }

    cvFree( &oob_samples_perm_ptr );
    cvFree( &samples_ptr );
    cvFree( &missing_ptr );
    cvFree( &true_resp_ptr );
    
    cvReleaseMat( &sample_idx_mask_for_tree );
    cvReleaseMat( &sample_idx_for_tree );

    cvReleaseMat( &oob_sample_votes );
    cvReleaseMat( &oob_responses );

    return true;
}
예제 #3
0
파일: test_rgbd.cpp 프로젝트: far-ad/GP-RF
int main(int argc, char** argv)
{
  // std::cout<<FLT_EPSILON<<std::endl; 
  cv::Mat training_data, training_labels,testing_data, testing_labels;
  
  training_data = read_rgbd_data_cv(argv[1],NUMBER_OF_TRAINING_SAMPLES);
  training_labels = read_rgbd_data_cv(argv[2], NUMBER_OF_TRAINING_SAMPLES);
  testing_data = read_rgbd_data_cv(argv[3],NUMBER_OF_TESTING_SAMPLES);
  testing_labels = read_rgbd_data_cv(argv[4], NUMBER_OF_TESTING_SAMPLES);
  
 
  printf("dataset specs: %d samples with %d features\n", training_data.rows, training_data.cols);

  // define all the attributes as numerical
  // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
  // that can be assigned on a per attribute basis

  cv::Mat var_type = cv::Mat(training_data.cols + 1, 1, CV_8U );
  var_type.setTo(cv::Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical
  var_type.at<uchar>(training_data.cols, 0) = CV_VAR_CATEGORICAL; // the labels are categorical

  /********************************步骤1:定义初始化Random Trees的参数******************************/
  float priors[] = {1,1,1,1,1};  // weights of each classification for classes
  CvRTParams params = CvRTParams(25, // max depth
				 50, // min sample count
				 0, // regression accuracy: N/A here
				 false, // compute surrogate split, no missing data
				 15, // max number of categories (use sub-optimal algorithm for larger numbers)
				 priors, // the array of priors
				 false,  // calculate variable importance
				 20,       // number of variables randomly selected at node and used to find the best split(s).
				 NUMBER_OF_TREES,	 // max number of trees in the forest
				 0.01f,				// forrest accuracy
				 CV_TERMCRIT_ITER |	CV_TERMCRIT_EPS // termination cirteria
				 );
  
  /****************************步骤2:训练 Random Decision Forest(RDF)分类器*********************/
  // printf( "\nUsing training database: %s\n\n", argv[1]);
  CvRTrees* rtree = new CvRTrees;
  rtree->train(training_data, CV_ROW_SAMPLE, training_labels,
	       cv::Mat(), cv::Mat(), var_type, cv::Mat(), params);
  
  // perform classifier testing and report results
  cv::Mat test_sample, train_sample;
  int correct_class = 0;
  int wrong_class = 0;
  int result;
  int label;
  int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0};
  int false_negatives [NUMBER_OF_CLASSES] = {0,0,0,0,0};

  CvDTreeNode* leaf_nodes [training_data.rows];

  for (int tsample = 0; tsample < training_data.rows; tsample++)
    {
      train_sample = training_data.row(tsample);
      CvForestTree* tree = rtree->get_tree(1);
      CvDTreeNode* leaf_node = tree->predict(train_sample, cv::Mat());
      leaf_nodes[tsample] = leaf_node; 
    }

  // printf( "\nUsing testing database: %s\n\n", argv[2]);

  for (int tsample = 0; tsample < testing_data.rows; tsample++)
    {	       
      // extract a row from the testing matrix
      test_sample = testing_data.row(tsample);
      // train on the testing data:
      // test_sample = training_data.row(tsample);
      /********************************步骤3:预测*********************************************/

      result = (int) rtree->predict(test_sample, cv::Mat());
      label = (int) testing_labels.at<float>(tsample, 0);

      printf("Testing Sample %i -> class result (digit %d) - label (digit %d)\n", tsample, result, label);

      // get the leaf nodes of the first tree in the forest
      /*CvForestTree* tree = rtree->get_tree(0);
      std::list<const CvDTreeNode*> leaf_list;
      leaf_list = get_leaf_node( tree );
      printf("Number of Leaf nodes: %ld\n", leaf_list.size());*/

      // if the prediction and the (true) testing classification are the same
      // (N.B. openCV uses a floating point decision tree implementation!)
      if (fabs(result - label)
	  >= FLT_EPSILON)
	{
	  // if they differ more than floating point error => wrong class
	  wrong_class++;
	  false_positives[(int) result]++;
	  false_negatives[(int) testing_labels.at<float>(tsample, 0)]++;
	}
      else
	{
	  // otherwise correct
	  correct_class++;
	}
    }

  printf( // "\nResults on the testing database: %s\n"
	 "\tCorrect classification: %d (%g%%)\n"
	 "\tWrong classifications: %d (%g%%)\n",
	 // argv[2],
	 correct_class, (double) correct_class*100/testing_data.rows,
	 wrong_class, (double) wrong_class*100/testing_data.rows);

  for (int i = 0; i < NUMBER_OF_CLASSES; i++)
    {
      printf( "\tClass (digit %d) false postives 	%d (%g%%)\n\t                false negatives  %d (%g%%)\n", i,
	      false_positives[i],
	      (double) false_positives[i]*100/testing_data.rows,
	      false_negatives[i],
	      (double) false_negatives[i]*100/testing_data.rows);
    }

	// get all the leaf nodes in the forest
   for (int i = 0; i < NUMBER_OF_TREES; i ++)
	{ 
      	CvForestTree* tree = rtree->get_tree(i);
      	std::list<const CvDTreeNode*> leaf_list;
      	leaf_list = get_leaf_node( tree );
	}
  	//get training_sample indices for leaf nodes
  std::list<leaf_samples> node_indices;
  for (int i = 0; i < training_data.rows; i++) 
    {
      CvDTreeNode* leaf_node = leaf_nodes[i];

      if (leaf_node != NULL) 
	  {
		leaf_samples leaf_sample;
		leaf_sample.leaf = leaf_node;
		leaf_sample.indices.push_front(i);
		printf("\nValue of leaf: %f\n", leaf_node->value);
		printf("Smaple indices for leaf:\n");
		printf(" %d", i);

		for (int j=i+1; j < training_data.rows; j++) 
	  	{
	    	if (leaf_node == leaf_nodes[j])
			{
	      		leaf_sample.indices.push_front(j);
	      		printf(" %lu", j);
	      		leaf_nodes[j] = NULL;
	    	}
	  	}
		node_indices.push_front(leaf_sample);      
      }
    }
  	printf("\nSize of node_indices: %d\n", node_indices.size()); 
	//get labels and features
	

  //get double pointers for features and labels
  const double* p = testing_data.ptr<double>(0);
  std::vector<double> vec(p, p + testing_data.cols);

  
  // all matrix memory free by destructors

  // all OK : main returns 0
  // result = rtree->predict(testing_data.row(79), cv::Mat());
  // float andi = result - testing_labels.at<float>(79, 0);
  // // std::cout<<training_labels.row(0).col(0)<<std::endl;
  // std::cout<<andi<<std::endl;
  return 0;
}