예제 #1
0
int mainPCA(const vector<string> &args, const map<string, string> &opts)
{
    if(args.size() < 4) {
        throw std::runtime_error("ERROR: Incorrect number of arguments. Run command with flag -h for help.");
    }

    double t = (double)getTickCount();

    LOG(INFO) << "Performing a PCA analysis on the images database";

    string dbFName = args[2];
    string pcaFName = args[3];
    string category;

    LOG(INFO) << "Obtaining feature extractor parameters";
    ParametersMap featParams;
    if(opts.count("-c") == 1) {
        category = opts.at("-c");
        featParams = FeatureExtractor::getDefaultParameters("hog");
    } else {
        throw std::runtime_error("ERROR: Incorrect number of arguments. Run command with flag -h for help.");
    }

    if(boost::filesystem::exists(dbFName))
    {
        LOG(INFO) << "Creating the image database";
        PascalImageDatabase db(dbFName.c_str(), category);
        cout << db << endl;

        FeatureExtractor *featExtractor = FeatureExtractor::create(featParams);

        LOG(INFO) << "Category: " << category;

        LOG(INFO) << "Extracting HOG features";
        FeatureCollection features;
        (*featExtractor)(db, features);

        LOG(INFO) << "Performing PCA on the obtained HOG features";
        int num_samples = features.size();
        int num_features = features[0].size();
        Mat data(num_features,num_samples,CV_32FC1,Scalar(0));
        PrincipalComponentAnalysis pca;
        pca.pre_process(features,data);
        pca.compute(data,db);
        pca.savePCAFile(pcaFName);

        t = (double)getTickCount() - t;
        LOG(INFO) << "PCA completed in " << t/getTickFrequency() << " seconds.";

        return EXIT_SUCCESS;
    }
    else
    {
        throw std::runtime_error("ERROR: Pascal database file doesn't exist in: " + dbFName);
    }
}
std::vector<float>
SupportVectorMachine::predict(const FeatureCollection &fset) const
{
    std::vector<float> preds(fset.size());
    for(int i = 0; i < fset.size(); i++) {
        preds[i] = predict(fset[i]);
    }

    return preds;
}
예제 #3
0
int SvmTrain( const ICDAR2011DataSet& dataset, FeatureCollection& features ) {
	for (int j = 0;j < 3;j++) {
		vector<int>false_idx;
		false_idx.clear();
		svm_train(features,dataset, false_idx);
		random_shuffle(false_idx.begin(),false_idx.end());
		for (int i = 0;i<100;i++) {
			int idx = false_idx[i];
			features.push_back(features[idx]);
		}
		random_shuffle(features.begin(),features.end());
	}
}
예제 #4
0
void svm_train(const FeatureCollection& features,const ICDAR2011DataSet& dataset ,vector<int>& false_idx) {
	size_t feature_count = features.size();
	int validation_feature_count = 5000;
    size_t dim = features[0].featureArray.size();
	Mat train_data(feature_count - validation_feature_count, dim, CV_32F);
    Mat train_label(feature_count - validation_feature_count, 1, CV_32F);
	
	for(int feature_idx = 0; feature_idx < feature_count - validation_feature_count; feature_idx++) {
		float sum_norm = 0;
		if (NORM) 
		    sum_norm = accumulate(features[feature_idx].featureArray.begin(),features[feature_idx].featureArray.end(),0.0);
		else {
		    for (int i = 0;i< dim;i++) 
		        sum_norm += features[feature_idx].featureArray[i] * features[feature_idx].featureArray[i];
		    sum_norm = sqrt(sum_norm);
		}
		for(int dim_idx = 0; dim_idx < dim; dim_idx++) {
			FeatureAtPoint fea = features[feature_idx];
			train_data.at<float>(feature_idx, dim_idx) = features[feature_idx].featureArray[dim_idx]/sum_norm;
		}
		train_label.at<float>(feature_idx) = features[feature_idx].label;
    }
	
	std::cout<<"svm train-------------------------------------"<<std::endl; 
	CvSVM SVM;
	CvBoost boost;  
	CvBoostParams params = CvBoostParams(CvBoost::REAL, 50, 0.95, 5, false, 0 );	
	boost.train(train_data, CV_ROW_SAMPLE,train_label, Mat(), Mat(), Mat(),Mat(),params);
	
	std::cout<<"svm validad-----------------------------------"<<std::endl;
    // Calculate trainning error
	cout<<feature_count<<endl;

	Mat test_data(1, dim, CV_32F);
    float predict_correct = 0, positive_cnt = 0, negative_cnt = 0;
	for(int feature_idx = feature_count - validation_feature_count; feature_idx < feature_count; feature_idx++) {
        float sum_norm = 0 ;
        if (NORM) 
            sum_norm = accumulate(features[feature_idx].featureArray.begin(),features[feature_idx].featureArray.end(),0.0);
        else {
            for (int i = 0;i< dim;i++) 
                sum_norm += features[feature_idx].featureArray[i] * features[feature_idx].featureArray[i];
            sum_norm = sqrt(sum_norm);
        }
        
        for(int dim_idx = 0; dim_idx < dim; dim_idx++) {
            test_data.at<float>(0, dim_idx) = features[feature_idx].featureArray[dim_idx]/sum_norm;
        }
		
		if(features[feature_idx].label == boost.predict(test_data)) {
			predict_correct++;
		} else {
			false_idx.push_back(feature_idx);
		}
        positive_cnt += features[feature_idx].label == 1 ? 1 : 0;
        negative_cnt += features[feature_idx].label == 0 ? 1 : 0;
    }
	cout << "Training accuracy:" << predict_correct / 5000 << " pos_cnt:" << positive_cnt << " neg_cnt:" << negative_cnt << endl;
	boost.save(((dataset.model_dir + "boost.model").c_str()));
}
예제 #5
0
int SvmTest( const ICDAR2011DataSet& dataset, FeatureCollection& features , const vector<int>&ProposalLen, const vector< vector<Rect> >&Proposal) {
	long sumProposal = accumulate( ProposalLen.begin(), ProposalLen.end(), 0 );
	assert( sumProposal == features.size() );
	
	CvBoost boost;  
	boost.load((dataset.model_dir + "boost.model").c_str());
	
	for( size_t image_idx = 0; image_idx < ProposalLen.size() ;image_idx ++ ) {
		string filename = dataset.test_set[ image_idx ];
		string resultPath = dataset.result_dir + CmFile::GetFileNameWithoutExtension( filename ) + ".txt"; 
		cout<<"result save path "<<resultPath<<" "<<image_idx<<" of "<<dataset.test_num<<endl;

		ofstream score_out( resultPath.c_str() );
		int num = ProposalLen[ image_idx ];
		vector<Rect>proposal = Proposal[ image_idx ];
		assert( num == proposal.size() );

		long StartIndex = 0;
		for (int i = 0;i < num ;i++) {
			FeatureAtPoint featurePoint = features[ i + StartIndex ];
			int dim = featurePoint.featureArray.size();

			Mat test_data(1, dim, CV_32F);
			double sum_norm = 0;
			if (NORM) {
				sum_norm = accumulate(featurePoint.featureArray.begin(),featurePoint.featureArray.end(),0.0);
			} else {
				for (int i =  0;i< dim;i++)  
					sum_norm += featurePoint.featureArray[i] * featurePoint.featureArray[i];
				sum_norm = sqrt(sum_norm);
			}
			vector<float>sample;
			for (int dim_idx = 0;dim_idx < dim;dim_idx++) {
				test_data.at<float>(0, dim_idx) = featurePoint.featureArray[dim_idx]/ sum_norm;
				sample.push_back(featurePoint.featureArray[dim_idx]/sum_norm);
			}
			double score = boost.predict(test_data,Mat(),Range::all(),false,true);
	//		score = score * (-1);
			score_out<< proposal[i].x << " " << proposal[i].y << " "
				<< proposal[i].width << " "  << proposal[i].height << " "
				<< score << endl;
		}                
		StartIndex += num;
		score_out.close();
	}
}
void
SupportVectorMachine::train(const std::vector<float> &labels, const FeatureCollection &fset, const Size &roiSize, double C)
{
    if(labels.size() != fset.size()) throw CError("Database size is different from feature set size!");

    _fVecShape = fset[0].Shape();
    if(roiSize.width != 0 && roiSize.height != 0) {
        _roiSize = roiSize;
    } else {
        _roiSize.width = _fVecShape.width;
        _roiSize.height = _fVecShape.height;
    }

    // Figure out size and number of feature vectors
    int nVecs = labels.size();
    CShape shape = fset[0].Shape();
    int dim = shape.width * shape.height * shape.nBands;

    // Parameters for SVM
    svm_parameter parameter;
    parameter.svm_type = C_SVC;
    parameter.kernel_type = LINEAR;
    parameter.degree = 0;
    parameter.gamma = 0;
    parameter.coef0 = 0;
    parameter.nu = 0.5;
    parameter.cache_size = 100;  // In MB
    parameter.C = C;
    parameter.eps = 1e-3;
    parameter.p = 0.1;
    parameter.shrinking = 1;
    parameter.probability = 0;
    parameter.nr_weight = 0; // ?
    parameter.weight_label = NULL;
    parameter.weight = NULL;
    //cross_validation = 0;

    // Allocate memory
    svm_problem problem;
    problem.l = nVecs;
    problem.y = new double[nVecs];
    problem.x = new svm_node*[nVecs];
    if(_data) delete [] _data;

    /******** BEGIN TODO ********/
    // Copy the data used for training the SVM into the libsvm data structures "problem".
    // Put the feature vectors in _data and labels in problem.y. Also, problem.x[k]
    // should point to the address in _data where the k-th feature vector starts (i.e.,
    // problem.x[k] = &_data[starting index of k-th feature])
    //
    // Hint:
    // * Don't forget to set _data[].index to the corresponding dimension in
    //   the original feature vector. You also need to set _data[].index to -1
    //   right after the last element of each feature vector

    // Vector containing all feature vectors. svm_node is a struct with
    // two fields, index and value. Index entry indicates position
    // in feature vector while value is the value in the original feature vector,
    // each feature vector of size k takes up k+1 svm_node's in _data
    // the last one being simply to indicate that the feature has ended by setting the index
    // entry to -1
    _data = new svm_node[nVecs * (dim + 1)];

	// Position in the feature vector
	int posFeatVec = 0;
	// Counter for elements in _data
	int dataCnt = 0;
	// Loop through each feature vector
	for (int i = 0; i < nVecs; i++)
	{
		// Set labels
		problem.y[i] = labels[i];
		
		// For each vector, loop through feature elements --> (x, y, band)
		for (int y = 0; y < shape.height; y++)
		{
			for (int x = 0; x < shape.width; x++)
			{
				for (int band = 0; band < shape.nBands; band++)
				{
					// 0 ... (dim-1)
					if (posFeatVec < dim)
					{
						_data[dataCnt].index = posFeatVec;
						_data[dataCnt].value = fset[i].Pixel(x, y, band);
						posFeatVec++;
					}
					// dim : end of current feature vector
					// If just set last feature value, set end flag in the same iteration
					if (posFeatVec == dim)
					{
						dataCnt++;
						// Set the index entry to -1
						_data[dataCnt].index = -1;
						_data[dataCnt].value = -1;
						// Position in a new feature vector
						posFeatVec = 0;
					}
					// Ready for next element
					dataCnt++;
				}
				// End of band loop
			}
			// End of x loop
		}
		// End of y loop

		// Position where the i-th feature vector starts
		problem.x[i] = &_data[i * (dim+1)];
	}
	// End of i loop

    /******** END TODO ********/

    // Train the model
    if(_model != NULL) svm_free_and_destroy_model(&_model);
    _model = svm_train(&problem, &parameter);

    // Cleanup
    delete [] problem.y;
    delete [] problem.x;
}