int mainPCA(const vector<string> &args, const map<string, string> &opts) { if(args.size() < 4) { throw std::runtime_error("ERROR: Incorrect number of arguments. Run command with flag -h for help."); } double t = (double)getTickCount(); LOG(INFO) << "Performing a PCA analysis on the images database"; string dbFName = args[2]; string pcaFName = args[3]; string category; LOG(INFO) << "Obtaining feature extractor parameters"; ParametersMap featParams; if(opts.count("-c") == 1) { category = opts.at("-c"); featParams = FeatureExtractor::getDefaultParameters("hog"); } else { throw std::runtime_error("ERROR: Incorrect number of arguments. Run command with flag -h for help."); } if(boost::filesystem::exists(dbFName)) { LOG(INFO) << "Creating the image database"; PascalImageDatabase db(dbFName.c_str(), category); cout << db << endl; FeatureExtractor *featExtractor = FeatureExtractor::create(featParams); LOG(INFO) << "Category: " << category; LOG(INFO) << "Extracting HOG features"; FeatureCollection features; (*featExtractor)(db, features); LOG(INFO) << "Performing PCA on the obtained HOG features"; int num_samples = features.size(); int num_features = features[0].size(); Mat data(num_features,num_samples,CV_32FC1,Scalar(0)); PrincipalComponentAnalysis pca; pca.pre_process(features,data); pca.compute(data,db); pca.savePCAFile(pcaFName); t = (double)getTickCount() - t; LOG(INFO) << "PCA completed in " << t/getTickFrequency() << " seconds."; return EXIT_SUCCESS; } else { throw std::runtime_error("ERROR: Pascal database file doesn't exist in: " + dbFName); } }
std::vector<float> SupportVectorMachine::predict(const FeatureCollection &fset) const { std::vector<float> preds(fset.size()); for(int i = 0; i < fset.size(); i++) { preds[i] = predict(fset[i]); } return preds; }
int SvmTrain( const ICDAR2011DataSet& dataset, FeatureCollection& features ) { for (int j = 0;j < 3;j++) { vector<int>false_idx; false_idx.clear(); svm_train(features,dataset, false_idx); random_shuffle(false_idx.begin(),false_idx.end()); for (int i = 0;i<100;i++) { int idx = false_idx[i]; features.push_back(features[idx]); } random_shuffle(features.begin(),features.end()); } }
void svm_train(const FeatureCollection& features,const ICDAR2011DataSet& dataset ,vector<int>& false_idx) { size_t feature_count = features.size(); int validation_feature_count = 5000; size_t dim = features[0].featureArray.size(); Mat train_data(feature_count - validation_feature_count, dim, CV_32F); Mat train_label(feature_count - validation_feature_count, 1, CV_32F); for(int feature_idx = 0; feature_idx < feature_count - validation_feature_count; feature_idx++) { float sum_norm = 0; if (NORM) sum_norm = accumulate(features[feature_idx].featureArray.begin(),features[feature_idx].featureArray.end(),0.0); else { for (int i = 0;i< dim;i++) sum_norm += features[feature_idx].featureArray[i] * features[feature_idx].featureArray[i]; sum_norm = sqrt(sum_norm); } for(int dim_idx = 0; dim_idx < dim; dim_idx++) { FeatureAtPoint fea = features[feature_idx]; train_data.at<float>(feature_idx, dim_idx) = features[feature_idx].featureArray[dim_idx]/sum_norm; } train_label.at<float>(feature_idx) = features[feature_idx].label; } std::cout<<"svm train-------------------------------------"<<std::endl; CvSVM SVM; CvBoost boost; CvBoostParams params = CvBoostParams(CvBoost::REAL, 50, 0.95, 5, false, 0 ); boost.train(train_data, CV_ROW_SAMPLE,train_label, Mat(), Mat(), Mat(),Mat(),params); std::cout<<"svm validad-----------------------------------"<<std::endl; // Calculate trainning error cout<<feature_count<<endl; Mat test_data(1, dim, CV_32F); float predict_correct = 0, positive_cnt = 0, negative_cnt = 0; for(int feature_idx = feature_count - validation_feature_count; feature_idx < feature_count; feature_idx++) { float sum_norm = 0 ; if (NORM) sum_norm = accumulate(features[feature_idx].featureArray.begin(),features[feature_idx].featureArray.end(),0.0); else { for (int i = 0;i< dim;i++) sum_norm += features[feature_idx].featureArray[i] * features[feature_idx].featureArray[i]; sum_norm = sqrt(sum_norm); } for(int dim_idx = 0; dim_idx < dim; dim_idx++) { test_data.at<float>(0, dim_idx) = features[feature_idx].featureArray[dim_idx]/sum_norm; } if(features[feature_idx].label == boost.predict(test_data)) { predict_correct++; } else { false_idx.push_back(feature_idx); } positive_cnt += features[feature_idx].label == 1 ? 1 : 0; negative_cnt += features[feature_idx].label == 0 ? 1 : 0; } cout << "Training accuracy:" << predict_correct / 5000 << " pos_cnt:" << positive_cnt << " neg_cnt:" << negative_cnt << endl; boost.save(((dataset.model_dir + "boost.model").c_str())); }
int SvmTest( const ICDAR2011DataSet& dataset, FeatureCollection& features , const vector<int>&ProposalLen, const vector< vector<Rect> >&Proposal) { long sumProposal = accumulate( ProposalLen.begin(), ProposalLen.end(), 0 ); assert( sumProposal == features.size() ); CvBoost boost; boost.load((dataset.model_dir + "boost.model").c_str()); for( size_t image_idx = 0; image_idx < ProposalLen.size() ;image_idx ++ ) { string filename = dataset.test_set[ image_idx ]; string resultPath = dataset.result_dir + CmFile::GetFileNameWithoutExtension( filename ) + ".txt"; cout<<"result save path "<<resultPath<<" "<<image_idx<<" of "<<dataset.test_num<<endl; ofstream score_out( resultPath.c_str() ); int num = ProposalLen[ image_idx ]; vector<Rect>proposal = Proposal[ image_idx ]; assert( num == proposal.size() ); long StartIndex = 0; for (int i = 0;i < num ;i++) { FeatureAtPoint featurePoint = features[ i + StartIndex ]; int dim = featurePoint.featureArray.size(); Mat test_data(1, dim, CV_32F); double sum_norm = 0; if (NORM) { sum_norm = accumulate(featurePoint.featureArray.begin(),featurePoint.featureArray.end(),0.0); } else { for (int i = 0;i< dim;i++) sum_norm += featurePoint.featureArray[i] * featurePoint.featureArray[i]; sum_norm = sqrt(sum_norm); } vector<float>sample; for (int dim_idx = 0;dim_idx < dim;dim_idx++) { test_data.at<float>(0, dim_idx) = featurePoint.featureArray[dim_idx]/ sum_norm; sample.push_back(featurePoint.featureArray[dim_idx]/sum_norm); } double score = boost.predict(test_data,Mat(),Range::all(),false,true); // score = score * (-1); score_out<< proposal[i].x << " " << proposal[i].y << " " << proposal[i].width << " " << proposal[i].height << " " << score << endl; } StartIndex += num; score_out.close(); } }
void SupportVectorMachine::train(const std::vector<float> &labels, const FeatureCollection &fset, const Size &roiSize, double C) { if(labels.size() != fset.size()) throw CError("Database size is different from feature set size!"); _fVecShape = fset[0].Shape(); if(roiSize.width != 0 && roiSize.height != 0) { _roiSize = roiSize; } else { _roiSize.width = _fVecShape.width; _roiSize.height = _fVecShape.height; } // Figure out size and number of feature vectors int nVecs = labels.size(); CShape shape = fset[0].Shape(); int dim = shape.width * shape.height * shape.nBands; // Parameters for SVM svm_parameter parameter; parameter.svm_type = C_SVC; parameter.kernel_type = LINEAR; parameter.degree = 0; parameter.gamma = 0; parameter.coef0 = 0; parameter.nu = 0.5; parameter.cache_size = 100; // In MB parameter.C = C; parameter.eps = 1e-3; parameter.p = 0.1; parameter.shrinking = 1; parameter.probability = 0; parameter.nr_weight = 0; // ? parameter.weight_label = NULL; parameter.weight = NULL; //cross_validation = 0; // Allocate memory svm_problem problem; problem.l = nVecs; problem.y = new double[nVecs]; problem.x = new svm_node*[nVecs]; if(_data) delete [] _data; /******** BEGIN TODO ********/ // Copy the data used for training the SVM into the libsvm data structures "problem". // Put the feature vectors in _data and labels in problem.y. Also, problem.x[k] // should point to the address in _data where the k-th feature vector starts (i.e., // problem.x[k] = &_data[starting index of k-th feature]) // // Hint: // * Don't forget to set _data[].index to the corresponding dimension in // the original feature vector. You also need to set _data[].index to -1 // right after the last element of each feature vector // Vector containing all feature vectors. svm_node is a struct with // two fields, index and value. Index entry indicates position // in feature vector while value is the value in the original feature vector, // each feature vector of size k takes up k+1 svm_node's in _data // the last one being simply to indicate that the feature has ended by setting the index // entry to -1 _data = new svm_node[nVecs * (dim + 1)]; // Position in the feature vector int posFeatVec = 0; // Counter for elements in _data int dataCnt = 0; // Loop through each feature vector for (int i = 0; i < nVecs; i++) { // Set labels problem.y[i] = labels[i]; // For each vector, loop through feature elements --> (x, y, band) for (int y = 0; y < shape.height; y++) { for (int x = 0; x < shape.width; x++) { for (int band = 0; band < shape.nBands; band++) { // 0 ... (dim-1) if (posFeatVec < dim) { _data[dataCnt].index = posFeatVec; _data[dataCnt].value = fset[i].Pixel(x, y, band); posFeatVec++; } // dim : end of current feature vector // If just set last feature value, set end flag in the same iteration if (posFeatVec == dim) { dataCnt++; // Set the index entry to -1 _data[dataCnt].index = -1; _data[dataCnt].value = -1; // Position in a new feature vector posFeatVec = 0; } // Ready for next element dataCnt++; } // End of band loop } // End of x loop } // End of y loop // Position where the i-th feature vector starts problem.x[i] = &_data[i * (dim+1)]; } // End of i loop /******** END TODO ********/ // Train the model if(_model != NULL) svm_free_and_destroy_model(&_model); _model = svm_train(&problem, ¶meter); // Cleanup delete [] problem.y; delete [] problem.x; }