pair<int,string> TesseractBridge::processEx(const Mat& tmp, Rect& r) { api.Clear(); char* cstr = api.TesseractRect(tmp.data, tmp.channels(), tmp.cols*tmp.channels(), 0, 0, tmp.cols, tmp.rows); // cout << cstr << endl; delete[] cstr; tesseract::ResultIterator* ri = api.GetIterator(); tesseract::PageIteratorLevel level = tesseract::RIL_WORD; if (ri != 0) { const char* word = ri->GetUTF8Text(level); if(!word) return make_pair(0, ""); float conf = ri->Confidence(level); int x1, y1, x2, y2; ri->BoundingBox(level, &x1, &y1, &x2, &y2); // printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n", // word, conf, x1, y1, x2, y2); string theword(word); delete[] word; bool allnonalphanom = true; for (int i=0; i<theword.size(); i++) { allnonalphanom = allnonalphanom && !(isalnum(theword[i]) || theword[i] == '"' || theword[i] == '.' || theword[i] == ',' || theword[i] == '?' || theword[i] == '!' || theword[i] == '\''); } if (allnonalphanom) return make_pair(0, ""); r.x += x1; r.y += y1; r.width = x2-x1; r.height = y2-y1; return make_pair((int)conf, theword); } else return make_pair(0, ""); }
void run(Mat& image, string& output, vector<Rect>* component_rects=NULL, vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL, int component_level=0) { CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) ); #ifdef HAVE_TESSERACT if (component_texts != 0) component_texts->clear(); if (component_rects != 0) component_rects->clear(); if (component_confidences != 0) component_confidences->clear(); tess.SetImage((uchar*)image.data, image.size().width, image.size().height, image.channels(), image.step1()); tess.Recognize(0); char *outText; outText = tess.GetUTF8Text(); output = string(outText); delete [] outText; if ( (component_rects != NULL) || (component_texts != NULL) || (component_confidences != NULL) ) { tesseract::ResultIterator* ri = tess.GetIterator(); tesseract::PageIteratorLevel level = tesseract::RIL_WORD; if (component_level == OCR_LEVEL_TEXTLINE) level = tesseract::RIL_TEXTLINE; if (ri != 0) { do { const char* word = ri->GetUTF8Text(level); if (word == NULL) continue; float conf = ri->Confidence(level); int x1, y1, x2, y2; ri->BoundingBox(level, &x1, &y1, &x2, &y2); if (component_texts != 0) component_texts->push_back(string(word)); if (component_rects != 0) component_rects->push_back(Rect(x1,y1,x2-x1,y2-y1)); if (component_confidences != 0) component_confidences->push_back(conf); delete[] word; } while (ri->Next(level)); } delete ri; } tess.Clear(); #else cout << "OCRTesseract(" << component_level << image.type() <<"): Tesseract not found." << endl; output.clear(); if(component_rects) component_rects->clear(); if(component_texts) component_texts->clear(); if(component_confidences) component_confidences->clear(); #endif }