void tess_training_tester( //call tess const STRING& filename, //filename to output PBLOB *blob, //blob to match DENORM *denorm, //de-normaliser BOOL8 correct, //ly segmented char *text, //correct text inT32 count, //chars in text BLOB_CHOICE_LIST *ratings //list of results ) { TBLOB *tessblob; //converted blob TEXTROW tessrow; //dummy row if (correct) { classify_norm_method.set_value(character); // force char norm spc 30/11/93 tess_bn_matching.set_value(false); //turn it off tess_cn_matching.set_value(false); //convert blob tessblob = make_tess_blob (blob, TRUE); //make dummy row make_tess_row(denorm, &tessrow); //learn it LearnBlob(filename, tessblob, &tessrow, text); free_blob(tessblob); } }
/********************************************************************** * compare_bln_blobs * * Compare 2 baseline normalised blobs and return the rating. **********************************************************************/ float Tesseract::compare_bln_blobs( //match 2 blobs PBLOB *blob1, //first blob DENORM *denorm1, PBLOB *blob2, //other blob DENORM *denorm2) { TBLOB *tblob1; //tessblobs TBLOB *tblob2; TEXTROW tessrow1, tessrow2; //tess rows float rating; //match result tblob1 = make_tess_blob (blob1, TRUE); make_tess_row(denorm1, &tessrow1); tblob2 = make_tess_blob (blob2, TRUE); make_tess_row(denorm2, &tessrow2); rating = compare_tess_blobs (tblob1, &tessrow1, tblob2, &tessrow2); free_blob(tblob1); free_blob(tblob2); return rating; }
void Tesseract::tess_bn_matcher( //call tess PBLOB *pblob, //previous blob PBLOB *blob, //blob to match PBLOB *nblob, //next blob WERD *word, //word it came from DENORM *denorm, //de-normaliser BLOB_CHOICE_LIST *ratings //list of results ) { TBLOB *tessblob; //converted blob TEXTROW tessrow; //dummy row tess_bn_matching.set_value(true); //turn it on tess_cn_matching.set_value(false); //convert blob tessblob = make_rotated_tess_blob(denorm, blob, true); //make dummy row make_tess_row(denorm, &tessrow); //classify AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, NULL); free_blob(tessblob); }
void tess_training_tester( //call tess PBLOB *blob, //blob to match DENORM *denorm, //de-normaliser BOOL8 correct, //ly segmented char *text, //correct text INT32 count, //chars in text BLOB_CHOICE_LIST *ratings //list of results ) { TBLOB *tessblob; //converted blob TEXTROW tessrow; //dummy row if (correct) { NormMethod = character; //Force char norm spc 30/11/93 tess_bn_matching = FALSE; //turn it off tess_cn_matching = FALSE; //convert blob tessblob = make_tess_blob (blob, TRUE); //make dummy row make_tess_row(denorm, &tessrow); //learn it LearnBlob(tessblob, &tessrow, text, count); free_blob(tessblob); } }
void tess_default_matcher( //call tess PBLOB *pblob, //previous blob PBLOB *blob, //blob to match PBLOB *nblob, //next blob WERD *word, //word it came from DENORM *denorm, //de-normaliser BLOB_CHOICE_LIST &ratings //list of results ) { LIST result; //tess output TBLOB *tessblob; //converted blob TEXTROW tessrow; //dummy row tess_bn_matching = FALSE; //turn it off tess_cn_matching = FALSE; //convert blob tessblob = make_tess_blob (blob, TRUE); //make dummy row make_tess_row(denorm, &tessrow); //classify result = AdaptiveClassifier (tessblob, NULL, &tessrow); free_blob(tessblob); //make our format convert_choice_list(result, ratings); }
// Adapt to recognize the current image as the given character. // The image must be preloaded and be just an image of a single character. void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender) { UNICHAR_ID id = unicharset.unichar_to_id(unichar_repr, length); LINE_STATS LineStats; TEXTROW row; fill_dummy_row(baseline, xheight, descender, ascender, &row); GetLineStatsFromRow(&row, &LineStats); TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender); float threshold; int best_class = 0; float best_rating = -100; // Classify to get a raw choice. LIST result = AdaptiveClassifier(blob, NULL, &row); LIST p; for (p = result; p != NULL; p = p->next) { A_CHOICE *tesschoice = (A_CHOICE *) p->node; if (tesschoice->rating > best_rating) { best_rating = tesschoice->rating; best_class = tesschoice->string[0]; } } FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId); // We have to use char-level adaptation because otherwise // someone should do forced alignment somewhere. void AdaptToChar(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId, FLOAT32 Threshold); if (id == best_class) threshold = GoodAdaptiveMatch; else { /* the blob was incorrectly classified - find the rating threshold needed to create a template which will correct the error with some margin. However, don't waste time trying to make templates which are too tight. */ threshold = GetBestRatingFor(blob, &LineStats, id); threshold *= .9; const float max_threshold = .125; const float min_threshold = .02; if (threshold > max_threshold) threshold = max_threshold; // I have cuddled the following line to set it out of the strike // of the coverage testing tool. I have no idea how to trigger // this situation nor I have any necessity to do it. --mezhirov if (threshold < min_threshold) threshold = min_threshold; } if (blob->outlines) AdaptToChar(blob, &LineStats, id, threshold); free_blob(blob); }