示例#1
0
    void run( Mat& image,
              string& out_sequence,
              vector<Rect>* component_rects,
              vector<string>* component_texts,
              vector<float>* component_confidences,
              int component_level)
    {

        CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
        CV_Assert( (image.cols > 0) && (image.rows > 0) );
        CV_Assert( component_level == OCR_LEVEL_WORD );

        out_sequence.clear();
        if (component_rects != NULL)
            component_rects->clear();
        if (component_texts != NULL)
            component_texts->clear();
        if (component_confidences != NULL)
            component_confidences->clear();

        // First we split a line into words
        vector<Mat> words_mask;
        vector<Rect> words_rect;

        /// Find contours
        vector<vector<Point> > contours;
        vector<Vec4i> hierarchy;
        Mat tmp;
        image.copyTo(tmp);
        findContours( tmp, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE, Point(0, 0) );
        if (contours.size() < 6)
        {
            //do not split lines with less than 6 characters
            words_mask.push_back(image);
            words_rect.push_back(Rect(0,0,image.cols,image.rows));
        }
        else
        {

            Mat_<float> vector_w((int)image.cols,1);
            reduce(image, vector_w, 0, REDUCE_SUM, -1);

            vector<int> spaces;
            vector<int> spaces_start;
            vector<int> spaces_end;
            int space_count=0;
            int last_one_idx;

            int s_init = 0, s_end=vector_w.cols;
            for (int s=0; s<vector_w.cols; s++)
            {
                if (vector_w.at<float>(0,s) == 0)
                    s_init = s+1;
                else
                    break;
            }
            for (int s=vector_w.cols-1; s>=0; s--)
            {
                if (vector_w.at<float>(0,s) == 0)
                    s_end = s;
                else
                    break;
            }

            for (int s=s_init; s<s_end; s++)
            {
                if (vector_w.at<float>(0,s) == 0)
                {
                    space_count++;
                } else {
                    if (space_count!=0)
                    {
                        spaces.push_back(space_count);
                        spaces_start.push_back(last_one_idx);
                        spaces_end.push_back(s-1);
                    }
                    space_count = 0;
                    last_one_idx = s;
                }
            }
            Scalar mean_space,std_space;
            meanStdDev(Mat(spaces),mean_space,std_space);
            int num_word_spaces = 0;
            int last_word_space_end = 0;
            for (int s=0; s<(int)spaces.size(); s++)
            {
                if (spaces_end.at(s)-spaces_start.at(s) > mean_space[0]+(mean_space[0]*1.1)) //this 1.1 is a param?
                {
                    if (num_word_spaces == 0)
                    {
                        //cout << " we have a word from  0  to " << spaces_start.at(s) << endl;
                        Mat word_mask;
                        Rect word_rect = Rect(0,0,spaces_start.at(s),image.rows);
                        image(word_rect).copyTo(word_mask);

                        words_mask.push_back(word_mask);
                        words_rect.push_back(word_rect);
                    }
                    else
                    {
                        //cout << " we have a word from " << last_word_space_end << " to " << spaces_start.at(s) << endl;
                        Mat word_mask;
                        Rect word_rect = Rect(last_word_space_end,0,spaces_start.at(s)-last_word_space_end,image.rows);
                        image(word_rect).copyTo(word_mask);

                        words_mask.push_back(word_mask);
                        words_rect.push_back(word_rect);
                    }
                    num_word_spaces++;
                    last_word_space_end = spaces_end.at(s);
                }
            }
            //cout << " we have a word from " << last_word_space_end << " to " << vector_w.cols << endl << endl << endl;
            Mat word_mask;
            Rect word_rect = Rect(last_word_space_end,0,vector_w.cols-last_word_space_end,image.rows);
            image(word_rect).copyTo(word_mask);

            words_mask.push_back(word_mask);
            words_rect.push_back(word_rect);

        }

        for (int w=0; w<(int)words_mask.size(); w++)
        {

            vector< vector<int> > observations;
            vector< vector<double> > confidences;
            vector<int> obs;
            // First find contours and sort by x coordinate of bbox
            words_mask[w].copyTo(tmp);
            if (tmp.empty())
                continue;
            contours.clear();
            hierarchy.clear();
            /// Find contours
            findContours( tmp, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE, Point(0, 0) );
            vector<Rect> contours_rect;
            for (int i=0; i<(int)contours.size(); i++)
            {
                contours_rect.push_back(boundingRect(contours[i]));
            }

            sort(contours_rect.begin(), contours_rect.end(), sort_rect_horiz);

            // Do character recognition foreach contour
            for (int i=0; i<(int)contours.size(); i++)
            {
                Mat tmp_mask;
                words_mask[w](contours_rect.at(i)).copyTo(tmp_mask);

                vector<int> out_class;
                vector<double> out_conf;
                classifier->eval(tmp_mask,out_class,out_conf);
                if (!out_class.empty())
                    obs.push_back(out_class[0]);
                observations.push_back(out_class);
                confidences.push_back(out_conf);
            }


            //This must be extracted from dictionary, or just assumed to be equal for all characters
            vector<double> start_p(vocabulary.size());
            for (int i=0; i<(int)vocabulary.size(); i++)
                start_p[i] = 1.0/vocabulary.size();


            Mat V = Mat::zeros((int)observations.size(),(int)vocabulary.size(),CV_64FC1);
            vector<string> path(vocabulary.size());

            // Initialize base cases (t == 0)
            for (int i=0; i<(int)vocabulary.size(); i++)
            {
                for (int j=0; j<(int)observations[0].size(); j++)
                {
                    emission_p.at<double>(observations[0][j],obs[0]) = confidences[0][j];
                }
                V.at<double>(0,i) = start_p[i] * emission_p.at<double>(i,obs[0]);
                path[i] = vocabulary.at(i);
            }


            // Run Viterbi for t > 0
            for (int t=1; t<(int)obs.size(); t++)
            {

                //Dude this has to be done each time!!
                emission_p = Mat::eye(62,62,CV_64FC1);
                for (int e=0; e<(int)observations[t].size(); e++)
                {
                    emission_p.at<double>(observations[t][e],obs[t]) = confidences[t][e];
                }

                vector<string> newpath(vocabulary.size());

                for (int i=0; i<(int)vocabulary.size(); i++)
                {
                    double max_prob = 0;
                    int best_idx = 0;
                    for (int j=0; j<(int)vocabulary.size(); j++)
                    {
                        double prob = V.at<double>(t-1,j) * transition_p.at<double>(j,i) * emission_p.at<double>(i,obs[t]);
                        if ( prob > max_prob)
                        {
                            max_prob = prob;
                            best_idx = j;
                        }
                    }

                    V.at<double>(t,i) = max_prob;
                    newpath[i] = path[best_idx] + vocabulary.at(i);
                }

                // Don't need to remember the old paths
                path.swap(newpath);
            }

            double max_prob = 0;
            int best_idx = 0;
            for (int i=0; i<(int)vocabulary.size(); i++)
            {
                double prob = V.at<double>((int)obs.size()-1,i);
                if ( prob > max_prob)
                {
                    max_prob = prob;
                    best_idx = i;
                }
            }

            //cout << path[best_idx] << endl;
            out_sequence = out_sequence+" "+path[best_idx];

            if (component_rects != NULL)
                component_rects->push_back(words_rect[w]);
            if (component_texts != NULL)
                component_texts->push_back(path[best_idx]);
            if (component_confidences != NULL)
                component_confidences->push_back((float)max_prob);

        }

        return;
    }
示例#2
0
string_t
string_from_vector(char* buffer, size_t capacity, const vector_t v) {
	return string_format(buffer, capacity, STRING_CONST("(%.6" PRIreal ", %.6" PRIreal ", %.6" PRIreal ", %.6" PRIreal ")"),
		(real)vector_x(v), (real)vector_y(v), (real)vector_z(v), (real)vector_w(v));
}