bool OCRTess::detectAndRecog() { UMat grey = UMat::zeros(this->img.rows + 2, this->img.cols + 2, CV_8UC1); cvtColor(this->img.clone(), grey, COLOR_RGB2GRAY); vector<UMat> channels; channels.clear(); channels.push_back(grey); Mat m = 255 - grey.getMat(ACCESS_READ | ACCESS_WRITE); channels.push_back(m.getUMat(ACCESS_READ)); vector<vector<ERStat>> regions(2); regions[0].clear(); regions[1].clear(); switch (this->REGION) { case REG_CSER: { parallel_for_(Range(0, (int) channels.size()), Parallel_extractCSER(channels, regions, this->erf1, this->erf2)); break; } case REG_MSER: { vector<vector<Point> > contours; vector<Rect> bboxes; Ptr<MSER> mser = MSER::create(21, (int) (0.00002 * grey.cols * grey.rows), (int) (0.05 * grey.cols * grey.rows), 1, 0.7); mser->detectRegions(grey, contours, bboxes); if (contours.size() > 0) MSERsToERStats(grey, contours, regions); break; } default: { break; } } /*Text Recognition (OCR)*/ vector<vector<Vec2i> > nm_region_groups; vector<Rect> nm_boxes; switch (this->GROUP) { case 0: erGrouping(this->img, channels, regions, nm_region_groups, nm_boxes, ERGROUPING_ORIENTATION_HORIZ); break; case 1: default: erGrouping(this->img, channels, regions, nm_region_groups, nm_boxes, ERGROUPING_ORIENTATION_ANY, DIR + TR_GRP, 0.5); break; } if (!nm_boxes.size() || nm_boxes.size() > 1) return false; vector<string> words_detection; float min_confidence1 = 51.f, min_confidence2 = 60.f; vector<UMat> detections; for (int i = 0; i < (int) nm_boxes.size(); i++) { // rectangle(this->out, nm_boxes[i].tl(), nm_boxes[i].br(), Scalar(255, 255, 0), 3); UMat group_img = UMat::zeros(this->img.rows + 2, this->img.cols + 2, CV_8UC1); er_draw(channels, regions, nm_region_groups[i], group_img); group_img = group_img(nm_boxes[i]); copyMakeBorder(group_img.clone(), group_img, 15, 15, 15, 15, BORDER_CONSTANT, Scalar(0)); detections.push_back(group_img); } vector<string> outputs((int) detections.size()); vector<vector<Rect> > boxes((int) detections.size()); vector<vector<string> > words((int) detections.size()); vector<vector<float> > confidences((int) detections.size()); if (!detections.size() || detections.size() > 1) return false; for (int i = 0; i < (int) detections.size(); i = i + this->num) { Range r; if (i + this->num <= (int) detections.size()) r = Range(i, i + this->num); else r = Range(i, (int) detections.size()); parallel_for_(r, Parallel_OCR<OCRTesseract>(detections, outputs, boxes, words, confidences, this->ocrs)); } for (int i = 0; i < (int) detections.size(); i++) { outputs[i].erase(remove(outputs[i].begin(), outputs[i].end(), '\n'), outputs[i].end()); if (outputs[i].size() < 3) { continue; } for (int j = 0; j < (int) boxes[i].size(); j++) { boxes[i][j].x += nm_boxes[i].x - 15; boxes[i][j].y += nm_boxes[i].y - 15; if ((words[i][j].size() < 2) || (confidences[i][j] < min_confidence1) || ((words[i][j].size() == 2) && (words[i][j][0] == words[i][j][1])) || ((words[i][j].size() < 4) && (confidences[i][j] < min_confidence2)) || isRepetitive(words[i][j])) continue; words_detection.push_back(words[i][j]); // rectangle(this->out, boxes[i][j].tl(), boxes[i][j].br(), Scalar(255, 0, 255), 3); // Size word_size = getTextSize(words[i][j], FONT_HERSHEY_SIMPLEX, (double) scale_font, (int) (3 * scale_font), NULL); // rectangle(this->out, boxes[i][j].tl() - Point(3, word_size.height + 3), boxes[i][j].tl() + Point(word_size.width, 0), Scalar(255, 0, 255), -1); // putText(this->out, words[i][j], boxes[i][j].tl() - Point(1, 1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255, 255, 255), (int) (3 * scale_font)); } } if (!words_detection.size() || words_detection.size() > 1) return false; return (words_detection[0].compare(WORD) == 0); }
std::vector<arma::uvec> additiveSeparability( OptimisationProblem& optimisationProblem, const arma::uword numberOfEvaluations, const double minimalConfidence) { // Objects like `optimisationProblem` perform all validations by themselves. if (numberOfEvaluations < 1) { throw std::domain_error("additiveSeparability: The number of evaluations must be greater than 0."); } else if (minimalConfidence < 0.0 || minimalConfidence > 1.0) { throw std::domain_error("additiveSeparability: The minimal confidence must be within the interval (0, 1]."); } else if (!isRepresentableAsFloatingPoint(numberOfEvaluations)) { throw std::overflow_error("additiveSeparability: The number of elements must be representable as a floating point."); } if (minimalConfidence <= 0) { std::vector<arma::uvec> partition; partition.reserve(optimisationProblem.numberOfDimensions_); for (arma::uword n = 0; n < optimisationProblem.numberOfDimensions_; ++n) { partition.push_back({n}); } return partition; } /* The first of two steps to analyse the additive separability of a function is to estimate all two-set separations that fulfil the deviation and confidence requirements. * A function *f* is additive separable into two other function *g*, *h* if the following holds: * * f(x, y) - g(x) - h(y) = 0, for all x, y * * As it is practical impossible to simply guess two separations *g*, *h* of *f*, when we only got a caller to `.getObjectiveValue(...)` and no analytic form, we use a direct consequence from the equation above instead, that must also hold true for additive separable functions and uses only *f*: * * f(a, c) + f(b, d) - f(a, d) - f(b, c) = 0, for all a, b, c, d * */ std::vector<std::pair<arma::uvec, arma::uvec>> partitionCandidates = twoSetsPartitions(optimisationProblem.numberOfDimensions_); arma::rowvec confidences(partitionCandidates.size(), arma::fill::zeros); for (arma::uword n = 0; n < partitionCandidates.size(); ++n) { const std::pair<arma::uvec, arma::uvec>& partitionCandidate = partitionCandidates.at(n); for (arma::uword k = 0; k < numberOfEvaluations; ++k) { const arma::vec& firstFirstParamter = arma::randu<arma::vec>(optimisationProblem.numberOfDimensions_); const arma::vec& secondSecondParameter = arma::randu<arma::vec>(optimisationProblem.numberOfDimensions_); arma::vec firstSecondParameter = firstFirstParamter; firstSecondParameter.elem(partitionCandidate.first) = secondSecondParameter.elem(partitionCandidate.first); arma::vec secondFirstParameter = secondSecondParameter; secondFirstParameter.elem(partitionCandidate.first) = firstFirstParamter.elem(partitionCandidate.first); // **Note:** The summation of not-a-number values results in a not-a-number value and comparing it with another value returns false, so everything will work out just fine. if (std::abs(optimisationProblem.getObjectiveValueOfNormalisedParameter(firstFirstParamter) + optimisationProblem.getObjectiveValueOfNormalisedParameter(secondSecondParameter) - optimisationProblem.getObjectiveValueOfNormalisedParameter(firstSecondParameter) - optimisationProblem.getObjectiveValueOfNormalisedParameter(secondFirstParameter)) < ::mant::machinePrecision) { ++confidences(n); if (confidences(n) / static_cast<double>(numberOfEvaluations) >= minimalConfidence) { // Proceeds with the next partition candidate, as we already reached the confidence threshold. break; } } } } const arma::uvec& acceptablePartitionCandidatesIndicies = arma::find(confidences / static_cast<double>(numberOfEvaluations) >= minimalConfidence); std::vector<std::pair<arma::uvec, arma::uvec>> acceptablePartitionCandidates; acceptablePartitionCandidates.reserve(acceptablePartitionCandidatesIndicies.n_elem); for (const auto acceptablePartitionCandidateIndex : acceptablePartitionCandidatesIndicies) { acceptablePartitionCandidates.push_back(partitionCandidates.at(acceptablePartitionCandidateIndex)); } /* The last of the two steps is to calculate the partition with the maximal number of parts, from all acceptable two-set partitions. * If we now weaken our observation and assume that each accepted two-set partition holds true for **all** inputs, the partition with the maximal number of parts can be calculated by combining all intersections between one part and an other. * * For example, assume that we got 3 acceptable two-set partitions: * * - {{1, 2, 3, 4, 5}, {6}} * - {{1}, {2, 3, 4, 5, 6}} * - {{1, 2, 3}, {4, 5, 6}} * * We would then calculate the intersection between the first two partitions: * * {{1, 2, 3, 4, 5}, {6}} intersect {{1}, {2, 3, 4, 5, 6}} = * {{1, 2, 3, 4, 5} intersect {1}} union * {{1, 2, 3, 4, 5} intersect {2, 3, 4, 5, 6}} union * {{6} intersect {1}} union \ * {{6} intersect {2, 3, 4, 5, 6}} / skipped and directly replaced by {{6}} * = {{1}, {2, 3, 4, 5}, {6}} * * And intersect the resulting partitions with the remaining one: * * {{1}, {2, 3, 4, 5}, {6}} intersect {{1, 2, 3}, {4, 5, 6}} = * {{1} intersect {1, 2, 3}} union \ * {{1} intersect {4, 5, 6}} union / skipped and directly replaced by {{1}} * {{2, 3, 4, 5} intersect {1, 2, 3}} union * {{2, 3, 4, 5} intersect {4, 5, 6}} union * {{6} intersect {1, 2, 3}} union \ * {{6} intersect {4, 5, 6}} / skipped and directly replaced by {{6}} * = {{1}, {2, 3}, {4, 5}, {6}} * * And get {{1}, {2, 3}, {4, 5}, {6}} as partition with the maximal number of parts. */ if (acceptablePartitionCandidates.size() > 1) { std::vector<arma::uvec> partition = {acceptablePartitionCandidates.at(0).first, acceptablePartitionCandidates.at(0).second}; acceptablePartitionCandidates.erase(acceptablePartitionCandidates.cbegin()); for (const auto& acceptablePartitionCandidate : acceptablePartitionCandidates) { std::vector<arma::uvec> nextPartition; for (const auto& part : partition) { if (part.n_elem == 1) { nextPartition.push_back(part); } else { // **Note:** `std::set_intersection` requires that all parts are ordered at this point. std::vector<arma::uword> intersection; std::set_intersection(part.begin(), part.end(), acceptablePartitionCandidate.first.begin(), acceptablePartitionCandidate.first.end(), intersection.begin()); nextPartition.push_back(arma::uvec(intersection)); intersection.clear(); std::set_intersection(part.begin(), part.end(), acceptablePartitionCandidate.second.begin(), acceptablePartitionCandidate.second.end(), intersection.begin()); nextPartition.push_back(arma::uvec(intersection)); } } partition = nextPartition; // We are already finished, as there is no finer partition as having one part for each dimensions. if (partition.size() == optimisationProblem.numberOfDimensions_) { break; } } return partition; } else if (acceptablePartitionCandidates.size() == 1) { return {acceptablePartitionCandidates.at(0).first, acceptablePartitionCandidates.at(0).second}; } else { return {arma::regspace<arma::uvec>(0, optimisationProblem.numberOfDimensions_ - 1)}; } }
int main(int argc, char** argv) { bool do_esf = true; bool eval_only_closest_cluster = true; std::map<std::string, size_t> class2label; std::string test_dir, svm_path, class2label_file; std::string out_dir = "/tmp/class_results/"; v4r::PCLSegmenter<pcl::PointXYZRGB>::Parameter seg_param; v4r::GlobalNNClassifier<flann::L1, PointT> esf_classifier; KNN_ = 5; google::InitGoogleLogging(argv[0]); po::options_description desc("Depth-map and point cloud Rendering from mesh file\n======================================\n**Allowed options"); desc.add_options() ("help,h", "produce help message") ("mesh_dir,i", po::value<std::string>(&MESH_DIR_)->required(), "root directory containing mesh files (.ply) for each class. Each class is represented by a sub folder with the folder name indicating the class name. Inside these folders, there are .ply files with object models of this class. Each file represents an object identity.") ("models_dir,m", po::value<std::string>(&MODELS_DIR_)->required(), "directory containing the object models (will be generated if not exists)") ("test_dir,t", po::value<std::string>(&test_dir)->required(), "directory containing *.pcd files for testing") ("out_dir,o", po::value<std::string>(&out_dir)->default_value(out_dir), "output directory") ("do_esf,e", po::value<bool>(&do_esf)->default_value(do_esf), "if true, includes esf classification (shape based).") ("eval_only_closest_cluster", po::value<bool>(&eval_only_closest_cluster)->default_value(eval_only_closest_cluster), "if true, evaluates only the closest segmented cluster with respect to the camera.") ("kNN,k", po::value<int>(&KNN_)->default_value(KNN_), "defines the number k of nearest neighbor for classification") ("chop_z,z", po::value<double>(&seg_param.chop_at_z_ )->default_value(seg_param.chop_at_z_, boost::str(boost::format("%.2e") % seg_param.chop_at_z_)), "") ("seg_type", po::value<int>(&seg_param.seg_type_ )->default_value(seg_param.seg_type_), "") ("min_cluster_size", po::value<int>(&seg_param.min_cluster_size_ )->default_value(seg_param.min_cluster_size_), "") ("max_vertical_plane_size", po::value<int>(&seg_param.max_vertical_plane_size_ )->default_value(seg_param.max_vertical_plane_size_), "") ("num_plane_inliers", po::value<int>(&seg_param.num_plane_inliers_ )->default_value(seg_param.num_plane_inliers_), "") ("max_angle_plane_to_ground", po::value<double>(&seg_param.max_angle_plane_to_ground_ )->default_value(seg_param.max_angle_plane_to_ground_), "") ("sensor_noise_max", po::value<double>(&seg_param.sensor_noise_max_ )->default_value(seg_param.sensor_noise_max_), "") ("table_range_min", po::value<double>(&seg_param.table_range_min_ )->default_value(seg_param.table_range_min_), "") ("table_range_max", po::value<double>(&seg_param.table_range_max_ )->default_value(seg_param.table_range_max_), "") ("angular_threshold_deg", po::value<double>(&seg_param.angular_threshold_deg_ )->default_value(seg_param.angular_threshold_deg_), "") ("trained_svm_model", po::value<std::string>(&svm_path), "") ("class2label_file", po::value<std::string>(&class2label_file), "") ; po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); if (vm.count("help")) { std::cout << desc << std::endl; return false; } try { po::notify(vm); } catch(std::exception& e) { std::cerr << "Error: " << e.what() << std::endl << std::endl << desc << std::endl; return false; } ::svm_model *svm_mod_ = ::svm_load_model(svm_path.c_str()); std::ifstream f(class2label_file.c_str()); std::vector<std::string> words; std::string line; while (std::getline(f, line)) { boost::trim_right(line); split( words, line, boost::is_any_of("\t ")); class2label[words[0]] = static_cast<size_t>(atoi(words[1].c_str())); } f.close(); if (do_esf) init(esf_classifier); v4r::PCLSegmenter<pcl::PointXYZRGB> seg(seg_param); std::vector< std::string> sub_folders = v4r::io::getFoldersInDirectory( test_dir ); if( sub_folders.empty() ) sub_folders.push_back(""); for (const std::string &sub_folder : sub_folders){ const std::string sequence_path = test_dir + "/" + sub_folder; const std::string out_dir_full = out_dir + "/" + sub_folder; v4r::io::createDirIfNotExist(out_dir_full); std::vector< std::string > views = v4r::io::getFilesInDirectory(sequence_path, ".*.pcd", false); for ( const std::string &view : views ) { const std::string fn = sequence_path + "/" + view; std::cout << "Segmenting file " << fn << std::endl; pcl::PointCloud<pcl::PointXYZRGB>::Ptr cloud (new pcl::PointCloud<pcl::PointXYZRGB>()); pcl::io::loadPCDFile(fn, *cloud); seg.set_input_cloud(*cloud); std::vector<pcl::PointIndices> found_clusters; seg.do_segmentation(found_clusters); std::vector< std::vector < std::string > > categories (found_clusters.size()); std::vector< std::vector < float > > confidences (found_clusters.size()); if (eval_only_closest_cluster) { // only save classification result for cluster which is closest to the camera (w.r.t. to centroid) int min_id=-1; double min_centroid = std::numeric_limits<double>::max(); for(size_t i=0; i < found_clusters.size(); i++) { typename pcl::PointCloud<PointT>::Ptr clusterXYZ (new pcl::PointCloud<PointT>()); pcl::copyPointCloud(*cloud, found_clusters[i], *clusterXYZ); Eigen::Vector4f centroid; pcl::compute3DCentroid (*clusterXYZ, centroid); // double dist = centroid[0]*centroid[0] + centroid[1]*centroid[1] + centroid[2]*centroid[2]; if (centroid[2] < min_centroid) { min_centroid = centroid[2]; min_id = i; } } if (min_id >= 0) { std::vector<pcl::PointIndices> closest_cluster; closest_cluster.push_back( found_clusters[min_id] ); found_clusters = closest_cluster; } } std::string out_fn = out_dir_full + "/" + view; boost::replace_all(out_fn, ".pcd", ".anno_test"); std::ofstream of (out_fn.c_str()); for(size_t i=0; i < found_clusters.size(); i++) { typename pcl::PointCloud<PointT>::Ptr clusterXYZ (new pcl::PointCloud<PointT>()); pcl::copyPointCloud(*cloud, found_clusters[i], *clusterXYZ); Eigen::Vector4f centroid; pcl::compute3DCentroid (*clusterXYZ, centroid); std::cout << centroid[2] << " " << centroid[0]*centroid[0] + centroid[1]*centroid[1] + centroid[2]*centroid[2] << std::endl; if(do_esf) { esf_classifier.setInputCloud(clusterXYZ); esf_classifier.classify(); esf_classifier.getCategory(categories[i]); esf_classifier.getConfidence(confidences[i]); std::cout << "Predicted Label (ESF): " << categories[i][0] << std::endl; of << categories[i][0] << " " << confidences[i][0] << " "; for(size_t c_id=0; c_id<categories[i].size(); c_id++) { std::cout << categories[i][c_id] << "(" << confidences[i][c_id] << ")" << std::endl; } } of << std::endl; // feature_extraction_pipeline<float>(argc, argv); } of.close(); } } return 0; }