示例#1
0
文件: ocr.cpp 项目: somacar/somacar
bool OCRTess::detectAndRecog() {
    UMat grey = UMat::zeros(this->img.rows + 2, this->img.cols + 2, CV_8UC1);
    cvtColor(this->img.clone(), grey, COLOR_RGB2GRAY);

    vector<UMat> channels;
    channels.clear();
    channels.push_back(grey);
    Mat m = 255 - grey.getMat(ACCESS_READ | ACCESS_WRITE);
    channels.push_back(m.getUMat(ACCESS_READ));

    vector<vector<ERStat>> regions(2);
    regions[0].clear();
    regions[1].clear();

    switch (this->REGION) {
        case REG_CSER: {
            parallel_for_(Range(0, (int) channels.size()), Parallel_extractCSER(channels, regions, this->erf1, this->erf2));
            break;
        }
        case REG_MSER: {
            vector<vector<Point> > contours;
            vector<Rect> bboxes;
            Ptr<MSER> mser = MSER::create(21, (int) (0.00002 * grey.cols * grey.rows), (int) (0.05 * grey.cols * grey.rows), 1, 0.7);
            mser->detectRegions(grey, contours, bboxes);
            if (contours.size() > 0)
                MSERsToERStats(grey, contours, regions);
            break;
        }
        default: {
            break;
        }
    }

    /*Text Recognition (OCR)*/
    vector<vector<Vec2i> > nm_region_groups;
    vector<Rect> nm_boxes;
    switch (this->GROUP) {
        case 0:
            erGrouping(this->img, channels, regions, nm_region_groups, nm_boxes, ERGROUPING_ORIENTATION_HORIZ);
            break;
        case 1:
        default:
            erGrouping(this->img, channels, regions, nm_region_groups, nm_boxes, ERGROUPING_ORIENTATION_ANY, DIR + TR_GRP, 0.5);
            break;
    }

    if (!nm_boxes.size() || nm_boxes.size() > 1) return false;

    vector<string> words_detection;
    float min_confidence1 = 51.f, min_confidence2 = 60.f;

    vector<UMat> detections;
    for (int i = 0; i < (int) nm_boxes.size(); i++) {
//        rectangle(this->out, nm_boxes[i].tl(), nm_boxes[i].br(), Scalar(255, 255, 0), 3);
        UMat group_img = UMat::zeros(this->img.rows + 2, this->img.cols + 2, CV_8UC1);
        er_draw(channels, regions, nm_region_groups[i], group_img);
        group_img = group_img(nm_boxes[i]);
        copyMakeBorder(group_img.clone(), group_img, 15, 15, 15, 15, BORDER_CONSTANT, Scalar(0));
        detections.push_back(group_img);
    }
    vector<string> outputs((int) detections.size());
    vector<vector<Rect> > boxes((int) detections.size());
    vector<vector<string> > words((int) detections.size());
    vector<vector<float> > confidences((int) detections.size());

    if (!detections.size() || detections.size() > 1) return false;

    for (int i = 0; i < (int) detections.size(); i = i + this->num) {
        Range r;
        if (i + this->num <= (int) detections.size()) r = Range(i, i + this->num);
        else r = Range(i, (int) detections.size());
        parallel_for_(r, Parallel_OCR<OCRTesseract>(detections, outputs, boxes, words, confidences, this->ocrs));
    }

    for (int i = 0; i < (int) detections.size(); i++) {
        outputs[i].erase(remove(outputs[i].begin(), outputs[i].end(), '\n'), outputs[i].end());
        if (outputs[i].size() < 3) {
            continue;
        }
        for (int j = 0; j < (int) boxes[i].size(); j++) {
            boxes[i][j].x += nm_boxes[i].x - 15;
            boxes[i][j].y += nm_boxes[i].y - 15;
            if ((words[i][j].size() < 2) || (confidences[i][j] < min_confidence1) ||
                ((words[i][j].size() == 2) && (words[i][j][0] == words[i][j][1])) ||
                ((words[i][j].size() < 4) && (confidences[i][j] < min_confidence2)) ||
                isRepetitive(words[i][j]))
                continue;
            words_detection.push_back(words[i][j]);
//            rectangle(this->out, boxes[i][j].tl(), boxes[i][j].br(), Scalar(255, 0, 255), 3);
//            Size word_size = getTextSize(words[i][j], FONT_HERSHEY_SIMPLEX, (double) scale_font, (int) (3 * scale_font), NULL);
//            rectangle(this->out, boxes[i][j].tl() - Point(3, word_size.height + 3), boxes[i][j].tl() + Point(word_size.width, 0), Scalar(255, 0, 255), -1);
//            putText(this->out, words[i][j], boxes[i][j].tl() - Point(1, 1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255, 255, 255), (int) (3 * scale_font));
        }
    }

    if (!words_detection.size() || words_detection.size() > 1) return false;
    return (words_detection[0].compare(WORD) == 0);
}
示例#2
0
  std::vector<arma::uvec> additiveSeparability(
      OptimisationProblem& optimisationProblem,
      const arma::uword numberOfEvaluations,
      const double minimalConfidence) {
    // Objects like `optimisationProblem` perform all validations by themselves.
    if (numberOfEvaluations < 1) {
      throw std::domain_error("additiveSeparability: The number of evaluations must be greater than 0.");
    } else if (minimalConfidence < 0.0 || minimalConfidence > 1.0) {
      throw std::domain_error("additiveSeparability: The minimal confidence must be within the interval (0, 1].");
    } else if (!isRepresentableAsFloatingPoint(numberOfEvaluations)) {
      throw std::overflow_error("additiveSeparability: The number of elements must be representable as a floating point.");
    }

    if (minimalConfidence <= 0) {
      std::vector<arma::uvec> partition;
      partition.reserve(optimisationProblem.numberOfDimensions_);

      for (arma::uword n = 0; n < optimisationProblem.numberOfDimensions_; ++n) {
        partition.push_back({n});
      }

      return partition;
    }

    /* The first of two steps to analyse the additive separability of a function is to estimate all two-set separations that fulfil the deviation and confidence requirements.
     * A function *f* is additive separable into two other function *g*, *h* if the following holds:
     *
     * f(x, y) - g(x) - h(y) = 0, for all x, y
     *
     * As it is practical impossible to simply guess two separations *g*, *h* of *f*, when we only got a caller to `.getObjectiveValue(...)` and no analytic form, we use a direct consequence from the equation above instead, that must also hold true for additive separable functions and uses only *f*:
     *
     * f(a, c) + f(b, d) - f(a, d) - f(b, c) = 0, for all a, b, c, d
     *
     */

    std::vector<std::pair<arma::uvec, arma::uvec>> partitionCandidates = twoSetsPartitions(optimisationProblem.numberOfDimensions_);

    arma::rowvec confidences(partitionCandidates.size(), arma::fill::zeros);
    for (arma::uword n = 0; n < partitionCandidates.size(); ++n) {
      const std::pair<arma::uvec, arma::uvec>& partitionCandidate = partitionCandidates.at(n);

      for (arma::uword k = 0; k < numberOfEvaluations; ++k) {
        const arma::vec& firstFirstParamter = arma::randu<arma::vec>(optimisationProblem.numberOfDimensions_);
        const arma::vec& secondSecondParameter = arma::randu<arma::vec>(optimisationProblem.numberOfDimensions_);
        arma::vec firstSecondParameter = firstFirstParamter;
        firstSecondParameter.elem(partitionCandidate.first) = secondSecondParameter.elem(partitionCandidate.first);
        arma::vec secondFirstParameter = secondSecondParameter;
        secondFirstParameter.elem(partitionCandidate.first) = firstFirstParamter.elem(partitionCandidate.first);

        // **Note:** The summation of not-a-number values results in a not-a-number value and comparing it with another value returns false, so everything will work out just fine.
        if (std::abs(optimisationProblem.getObjectiveValueOfNormalisedParameter(firstFirstParamter) + optimisationProblem.getObjectiveValueOfNormalisedParameter(secondSecondParameter) - optimisationProblem.getObjectiveValueOfNormalisedParameter(firstSecondParameter) - optimisationProblem.getObjectiveValueOfNormalisedParameter(secondFirstParameter)) < ::mant::machinePrecision) {
          ++confidences(n);
          if (confidences(n) / static_cast<double>(numberOfEvaluations) >= minimalConfidence) {
            // Proceeds with the next partition candidate, as we already reached the confidence threshold.
            break;
          }
        }
      }
    }

    const arma::uvec& acceptablePartitionCandidatesIndicies = arma::find(confidences / static_cast<double>(numberOfEvaluations) >= minimalConfidence);
    std::vector<std::pair<arma::uvec, arma::uvec>> acceptablePartitionCandidates;
    acceptablePartitionCandidates.reserve(acceptablePartitionCandidatesIndicies.n_elem);

    for (const auto acceptablePartitionCandidateIndex : acceptablePartitionCandidatesIndicies) {
      acceptablePartitionCandidates.push_back(partitionCandidates.at(acceptablePartitionCandidateIndex));
    }

    /* The last of the two steps is to calculate the partition with the maximal number of parts, from all acceptable two-set partitions.
     * If we now weaken our observation and assume that each accepted two-set partition holds true for **all** inputs, the partition with the maximal number of parts can be calculated by combining all intersections between one part and an other.
     *
     * For example, assume that we got 3 acceptable two-set partitions:
     *
     * - {{1, 2, 3, 4, 5}, {6}} 
     * - {{1}, {2, 3, 4, 5, 6}}
     * - {{1, 2, 3}, {4, 5, 6}}
     *
     * We would then calculate the intersection between the first two partitions:
     *
     * {{1, 2, 3, 4, 5}, {6}} intersect {{1}, {2, 3, 4, 5, 6}} = 
     *   {{1, 2, 3, 4, 5} intersect {1}}             union
     *   {{1, 2, 3, 4, 5} intersect {2, 3, 4, 5, 6}} union
     *   {{6} intersect {1}}                         union \
     *   {{6} intersect {2, 3, 4, 5, 6}}                   / skipped and directly replaced by {{6}}
     *   = {{1}, {2, 3, 4, 5}, {6}}
     *
     * And intersect the resulting partitions with the remaining one:
     *
     * {{1}, {2, 3, 4, 5}, {6}} intersect {{1, 2, 3}, {4, 5, 6}} = 
     *   {{1} intersect {1, 2, 3}}          union \
     *   {{1} intersect {4, 5, 6}}          union / skipped and directly replaced by {{1}}
     *   {{2, 3, 4, 5} intersect {1, 2, 3}} union
     *   {{2, 3, 4, 5} intersect {4, 5, 6}} union
     *   {{6} intersect {1, 2, 3}}          union \
     *   {{6} intersect {4, 5, 6}}                / skipped and directly replaced by {{6}}
     *   = {{1}, {2, 3}, {4, 5}, {6}}
     *
     * And get {{1}, {2, 3}, {4, 5}, {6}} as partition with the maximal number of parts.
     */

    if (acceptablePartitionCandidates.size() > 1) {
      std::vector<arma::uvec> partition = {acceptablePartitionCandidates.at(0).first, acceptablePartitionCandidates.at(0).second};
      acceptablePartitionCandidates.erase(acceptablePartitionCandidates.cbegin());

      for (const auto& acceptablePartitionCandidate : acceptablePartitionCandidates) {
        std::vector<arma::uvec> nextPartition;
        for (const auto& part : partition) {
          if (part.n_elem == 1) {
            nextPartition.push_back(part);
          } else {
            // **Note:** `std::set_intersection` requires that all parts are ordered at this point.
            std::vector<arma::uword> intersection;
            std::set_intersection(part.begin(), part.end(), acceptablePartitionCandidate.first.begin(), acceptablePartitionCandidate.first.end(), intersection.begin());
            nextPartition.push_back(arma::uvec(intersection));
            intersection.clear();
            std::set_intersection(part.begin(), part.end(), acceptablePartitionCandidate.second.begin(), acceptablePartitionCandidate.second.end(), intersection.begin());
            nextPartition.push_back(arma::uvec(intersection));
          }
        }
        partition = nextPartition;

        // We are already finished, as there is no finer partition as having one part for each dimensions.
        if (partition.size() == optimisationProblem.numberOfDimensions_) {
          break;
        }
      }

      return partition;
    } else if (acceptablePartitionCandidates.size() == 1) {
      return {acceptablePartitionCandidates.at(0).first, acceptablePartitionCandidates.at(0).second};
    } else {
      return {arma::regspace<arma::uvec>(0, optimisationProblem.numberOfDimensions_ - 1)};
    }
  }
int main(int argc, char** argv)
{
    bool do_esf = true;
    bool eval_only_closest_cluster = true;
    std::map<std::string, size_t> class2label;

    std::string test_dir, svm_path, class2label_file;
    std::string out_dir = "/tmp/class_results/";
    v4r::PCLSegmenter<pcl::PointXYZRGB>::Parameter seg_param;

    v4r::GlobalNNClassifier<flann::L1, PointT> esf_classifier;

    KNN_ = 5;

    google::InitGoogleLogging(argv[0]);

    po::options_description desc("Depth-map and point cloud Rendering from mesh file\n======================================\n**Allowed options");
    desc.add_options()
            ("help,h", "produce help message")
            ("mesh_dir,i", po::value<std::string>(&MESH_DIR_)->required(), "root directory containing mesh files (.ply) for each class. Each class is represented by a sub folder with the folder name indicating the class name. Inside these folders, there are .ply files with object models of this class. Each file represents an object identity.")
            ("models_dir,m", po::value<std::string>(&MODELS_DIR_)->required(), "directory containing the object models (will be generated if not exists)")
            ("test_dir,t", po::value<std::string>(&test_dir)->required(), "directory containing *.pcd files for testing")
            ("out_dir,o", po::value<std::string>(&out_dir)->default_value(out_dir), "output directory")
            ("do_esf,e", po::value<bool>(&do_esf)->default_value(do_esf), "if true, includes esf classification (shape based).")
            ("eval_only_closest_cluster", po::value<bool>(&eval_only_closest_cluster)->default_value(eval_only_closest_cluster), "if true, evaluates only the closest segmented cluster with respect to the camera.")
            ("kNN,k", po::value<int>(&KNN_)->default_value(KNN_), "defines the number k of nearest neighbor for classification")
            ("chop_z,z", po::value<double>(&seg_param.chop_at_z_ )->default_value(seg_param.chop_at_z_, boost::str(boost::format("%.2e") % seg_param.chop_at_z_)), "")
            ("seg_type", po::value<int>(&seg_param.seg_type_ )->default_value(seg_param.seg_type_), "")
            ("min_cluster_size", po::value<int>(&seg_param.min_cluster_size_ )->default_value(seg_param.min_cluster_size_), "")
            ("max_vertical_plane_size", po::value<int>(&seg_param.max_vertical_plane_size_ )->default_value(seg_param.max_vertical_plane_size_), "")
            ("num_plane_inliers", po::value<int>(&seg_param.num_plane_inliers_ )->default_value(seg_param.num_plane_inliers_), "")
            ("max_angle_plane_to_ground", po::value<double>(&seg_param.max_angle_plane_to_ground_ )->default_value(seg_param.max_angle_plane_to_ground_), "")
            ("sensor_noise_max", po::value<double>(&seg_param.sensor_noise_max_ )->default_value(seg_param.sensor_noise_max_), "")
            ("table_range_min", po::value<double>(&seg_param.table_range_min_ )->default_value(seg_param.table_range_min_), "")
            ("table_range_max", po::value<double>(&seg_param.table_range_max_ )->default_value(seg_param.table_range_max_), "")
            ("angular_threshold_deg", po::value<double>(&seg_param.angular_threshold_deg_ )->default_value(seg_param.angular_threshold_deg_), "")
            ("trained_svm_model", po::value<std::string>(&svm_path), "")
            ("class2label_file", po::value<std::string>(&class2label_file), "")
   ;
     po::variables_map vm;
     po::store(po::parse_command_line(argc, argv, desc), vm);
     if (vm.count("help"))
     {
         std::cout << desc << std::endl;
         return false;
     }

     try
     {
         po::notify(vm);
     }
     catch(std::exception& e)
     {
         std::cerr << "Error: " << e.what() << std::endl << std::endl << desc << std::endl;
         return false;
     }

    ::svm_model *svm_mod_ = ::svm_load_model(svm_path.c_str());


    std::ifstream f(class2label_file.c_str());
    std::vector<std::string> words;
    std::string line;
    while (std::getline(f, line)) {
        boost::trim_right(line);
        split( words, line, boost::is_any_of("\t "));
        class2label[words[0]] = static_cast<size_t>(atoi(words[1].c_str()));
    }
    f.close();

    if (do_esf)
        init(esf_classifier);

    v4r::PCLSegmenter<pcl::PointXYZRGB> seg(seg_param);

    std::vector< std::string> sub_folders = v4r::io::getFoldersInDirectory( test_dir );
    if( sub_folders.empty() )
        sub_folders.push_back("");


    for (const std::string &sub_folder : sub_folders){
        const std::string sequence_path = test_dir + "/" + sub_folder;
        const std::string out_dir_full = out_dir + "/" +  sub_folder;
        v4r::io::createDirIfNotExist(out_dir_full);

        std::vector< std::string > views = v4r::io::getFilesInDirectory(sequence_path, ".*.pcd", false);
        for ( const std::string &view : views ) {
            const std::string fn = sequence_path + "/" + view;

            std::cout << "Segmenting file " << fn << std::endl;
            pcl::PointCloud<pcl::PointXYZRGB>::Ptr cloud (new pcl::PointCloud<pcl::PointXYZRGB>());
            pcl::io::loadPCDFile(fn, *cloud);
            seg.set_input_cloud(*cloud);

            std::vector<pcl::PointIndices> found_clusters;
            seg.do_segmentation(found_clusters);

            std::vector< std::vector < std::string > > categories (found_clusters.size());
            std::vector< std::vector < float > > confidences (found_clusters.size());

            if (eval_only_closest_cluster) { // only save classification result for cluster which is closest to the camera (w.r.t. to centroid)
                int min_id=-1;
                double min_centroid = std::numeric_limits<double>::max();

                for(size_t i=0; i < found_clusters.size(); i++)
                {
                    typename pcl::PointCloud<PointT>::Ptr clusterXYZ (new pcl::PointCloud<PointT>());
                    pcl::copyPointCloud(*cloud, found_clusters[i], *clusterXYZ);
                    Eigen::Vector4f centroid;
                    pcl::compute3DCentroid (*clusterXYZ, centroid);

        //            double dist = centroid[0]*centroid[0] + centroid[1]*centroid[1] + centroid[2]*centroid[2];

                    if (centroid[2] < min_centroid) {
                        min_centroid = centroid[2];
                        min_id = i;
                    }
                }

                if (min_id >= 0) {
                    std::vector<pcl::PointIndices> closest_cluster;
                    closest_cluster.push_back( found_clusters[min_id] );
                    found_clusters = closest_cluster;
                }
            }

            std::string out_fn = out_dir_full + "/" + view;
            boost::replace_all(out_fn, ".pcd", ".anno_test");
            std::ofstream of (out_fn.c_str());
            for(size_t i=0; i < found_clusters.size(); i++) {
                typename pcl::PointCloud<PointT>::Ptr clusterXYZ (new pcl::PointCloud<PointT>());
                pcl::copyPointCloud(*cloud, found_clusters[i], *clusterXYZ);

                Eigen::Vector4f centroid;
                pcl::compute3DCentroid (*clusterXYZ, centroid);
                std::cout << centroid[2] << " " << centroid[0]*centroid[0] + centroid[1]*centroid[1] + centroid[2]*centroid[2] << std::endl;

                if(do_esf) {
                    esf_classifier.setInputCloud(clusterXYZ);
                    esf_classifier.classify();
                    esf_classifier.getCategory(categories[i]);
                    esf_classifier.getConfidence(confidences[i]);

                    std::cout << "Predicted Label (ESF): " << categories[i][0] << std::endl;
                    of << categories[i][0] << " " << confidences[i][0] << " ";
                    for(size_t c_id=0; c_id<categories[i].size(); c_id++) {
                        std::cout << categories[i][c_id] << "(" << confidences[i][c_id] << ")" << std::endl;
                    }
                }

                of << std::endl;
//                feature_extraction_pipeline<float>(argc, argv);
            }
            of.close();
        }
    }
    return 0;
}