std::string identifyText(cv::Mat input, std::string language)
{
    ocr.Init(NULL, language.c_str(), tesseract::OEM_TESSERACT_ONLY);
    //std::string whitelist = "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.:";
    //ocr.SetVariable("tessedit_char_whitelist", whitelist.c_str());
    ocr.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
    ocr.SetImage(input.data, input.cols, input.rows, 1, input.step);
    std::string text = ocr.GetUTF8Text();

    return text;
}
Пример #2
0
int SubsExtractor::ocr(char *outtext)
{
	Mat crop,crop2;
	Mat(img, ROI).copyTo(crop);

	cvtColor(crop, crop2, CV_BGR2GRAY);
	//adaptiveThreshold(crop2,crop,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,CV_THRESH_BINARY,3,5);
	int th1 = 200;
	int th2 = 255;
	threshold(crop2,crop,th1,th2,THRESH_BINARY);

	//tesseract::TessBaseAPI tess;
	tess.SetImage((uchar *)crop.data,crop.cols,crop.rows,1,crop.cols);
	imshow("control", crop);
	char *s;
	if((s = tess.GetUTF8Text()) != NULL) {
		//fprintf(stderr,"OCR: %s\n",s);
		strcpy(outtext,s);
		return strlen(outtext);
	}
	return 0;
}
Пример #3
0
int detectNumber(Mat img){
 //Mat circ = copyContour(img, circle);

 inRange(img, Scalar(0, 0, 51), Scalar(255, 255, 255), img); //filter black
 tess.SetImage((uchar*)img.data, img.size().width, img.size().height, img.channels(), img.step1());
 //tess.SetRectangle(circle[0]-circle[2], circle[1]-circle[2],2*circle[2],2*circle[2]);
 string out = string (tess.GetUTF8Text());
 out.erase( std::remove_if( out.begin(), out.end(), ::isspace ), out.end() );
 const char* result = out.c_str();
 //printf("%s\n", result);
 switch(atoi(result)){
	case 10: return 5;
	case 20: return 6;
	case 30: return 7;
	case 40: return 8;
	case 50: return 9;
	case 60: return 10;
	case 70: return 11;
	case 80: return 12;
 }
 //rectangle(img, Point(circle[0]-circle[2], circle[1]-circle[2]), Point(circle[0]+circle[2], circle[1]+circle[2]), Scalar (100,255,255),3,8,0);
 // imwrite("./test.png", img);
 return -1;}
Пример #4
0
    void run(Mat& image, string& output, vector<Rect>* component_rects=NULL,
             vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
             int component_level=0)
    {

        CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );

#ifdef HAVE_TESSERACT

        if (component_texts != 0)
            component_texts->clear();
        if (component_rects != 0)
            component_rects->clear();
        if (component_confidences != 0)
            component_confidences->clear();

        tess.SetImage((uchar*)image.data, image.size().width, image.size().height, image.channels(), image.step1());
        tess.Recognize(0);
        char *outText;
        outText = tess.GetUTF8Text();
        output = string(outText);
        delete [] outText;

        if ( (component_rects != NULL) || (component_texts != NULL) || (component_confidences != NULL) )
        {
            tesseract::ResultIterator* ri = tess.GetIterator();
            tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
            if (component_level == OCR_LEVEL_TEXTLINE)
                level = tesseract::RIL_TEXTLINE;

            if (ri != 0) {
                do {
                    const char* word = ri->GetUTF8Text(level);
                    if (word == NULL)
                        continue;
                    float conf = ri->Confidence(level);
                    int x1, y1, x2, y2;
                    ri->BoundingBox(level, &x1, &y1, &x2, &y2);

                    if (component_texts != 0)
                        component_texts->push_back(string(word));
                    if (component_rects != 0)
                        component_rects->push_back(Rect(x1,y1,x2-x1,y2-y1));
                    if (component_confidences != 0)
                        component_confidences->push_back(conf);

                    delete[] word;
                } while (ri->Next(level));
            }
            delete ri;
        }

        tess.Clear();

#else

        cout << "OCRTesseract(" << component_level << image.type() <<"): Tesseract not found." << endl;
        output.clear();
        if(component_rects)
            component_rects->clear();
        if(component_texts)
            component_texts->clear();
        if(component_confidences)
            component_confidences->clear();
#endif
    }