std::string identifyText(cv::Mat input, std::string language) { ocr.Init(NULL, language.c_str(), tesseract::OEM_TESSERACT_ONLY); //std::string whitelist = "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.:"; //ocr.SetVariable("tessedit_char_whitelist", whitelist.c_str()); ocr.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK); ocr.SetImage(input.data, input.cols, input.rows, 1, input.step); std::string text = ocr.GetUTF8Text(); return text; }
int SubsExtractor::ocr(char *outtext) { Mat crop,crop2; Mat(img, ROI).copyTo(crop); cvtColor(crop, crop2, CV_BGR2GRAY); //adaptiveThreshold(crop2,crop,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,CV_THRESH_BINARY,3,5); int th1 = 200; int th2 = 255; threshold(crop2,crop,th1,th2,THRESH_BINARY); //tesseract::TessBaseAPI tess; tess.SetImage((uchar *)crop.data,crop.cols,crop.rows,1,crop.cols); imshow("control", crop); char *s; if((s = tess.GetUTF8Text()) != NULL) { //fprintf(stderr,"OCR: %s\n",s); strcpy(outtext,s); return strlen(outtext); } return 0; }
int detectNumber(Mat img){ //Mat circ = copyContour(img, circle); inRange(img, Scalar(0, 0, 51), Scalar(255, 255, 255), img); //filter black tess.SetImage((uchar*)img.data, img.size().width, img.size().height, img.channels(), img.step1()); //tess.SetRectangle(circle[0]-circle[2], circle[1]-circle[2],2*circle[2],2*circle[2]); string out = string (tess.GetUTF8Text()); out.erase( std::remove_if( out.begin(), out.end(), ::isspace ), out.end() ); const char* result = out.c_str(); //printf("%s\n", result); switch(atoi(result)){ case 10: return 5; case 20: return 6; case 30: return 7; case 40: return 8; case 50: return 9; case 60: return 10; case 70: return 11; case 80: return 12; } //rectangle(img, Point(circle[0]-circle[2], circle[1]-circle[2]), Point(circle[0]+circle[2], circle[1]+circle[2]), Scalar (100,255,255),3,8,0); // imwrite("./test.png", img); return -1;}
void run(Mat& image, string& output, vector<Rect>* component_rects=NULL, vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL, int component_level=0) { CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) ); #ifdef HAVE_TESSERACT if (component_texts != 0) component_texts->clear(); if (component_rects != 0) component_rects->clear(); if (component_confidences != 0) component_confidences->clear(); tess.SetImage((uchar*)image.data, image.size().width, image.size().height, image.channels(), image.step1()); tess.Recognize(0); char *outText; outText = tess.GetUTF8Text(); output = string(outText); delete [] outText; if ( (component_rects != NULL) || (component_texts != NULL) || (component_confidences != NULL) ) { tesseract::ResultIterator* ri = tess.GetIterator(); tesseract::PageIteratorLevel level = tesseract::RIL_WORD; if (component_level == OCR_LEVEL_TEXTLINE) level = tesseract::RIL_TEXTLINE; if (ri != 0) { do { const char* word = ri->GetUTF8Text(level); if (word == NULL) continue; float conf = ri->Confidence(level); int x1, y1, x2, y2; ri->BoundingBox(level, &x1, &y1, &x2, &y2); if (component_texts != 0) component_texts->push_back(string(word)); if (component_rects != 0) component_rects->push_back(Rect(x1,y1,x2-x1,y2-y1)); if (component_confidences != 0) component_confidences->push_back(conf); delete[] word; } while (ri->Next(level)); } delete ri; } tess.Clear(); #else cout << "OCRTesseract(" << component_level << image.type() <<"): Tesseract not found." << endl; output.clear(); if(component_rects) component_rects->clear(); if(component_texts) component_texts->clear(); if(component_confidences) component_confidences->clear(); #endif }