/*---------------------------------------------------------------------------*/ void LearnBlob (TBLOB * Blob, TEXTROW * Row, char BlobText[], int TextLength) /* ** Parameters: ** Blob blob whose micro-features are to be learned ** Row row of text that blob came from ** BlobText text that corresponds to blob ** TextLength number of characters in blob ** Globals: ** imagefile base filename of the page being learned ** FontName name of font currently being trained on ** Operation: ** Extract micro-features from the specified blob and append ** them to the appropriate file. ** Return: none ** Exceptions: none ** History: 7/28/89, DSJ, Created. */ #define MAXFILENAME 80 #define MAXCHARNAME 20 #define MAXFONTNAME 20 #define TRAIN_SUFFIX ".tr" { static FILE *FeatureFile = NULL; char Filename[MAXFILENAME]; char CharName[MAXCHARNAME]; CHAR_DESC CharDesc; LINE_STATS LineStats; EnterLearnMode; // throw out blobs which do not represent only one character if (TextLength != 1) return; GetLineStatsFromRow(Row, &LineStats); CharDesc = ExtractBlobFeatures (Blob, &LineStats); // if a feature file is not yet open, open it // the name of the file is the name of the image plus TRAIN_SUFFIX if (FeatureFile == NULL) { strcpy(Filename, imagefile); strcat(Filename, TRAIN_SUFFIX); FeatureFile = Efopen (Filename, "w"); cprintf ("TRAINING ... Font name = %s.\n", FontName); } // get the name of the character for this blob chartoname (CharName, BlobText[0], ""); // label the features with a class name and font name fprintf (FeatureFile, "\n%s %s ", FontName, CharName); // write micro-features to file and clean up WriteCharDescription(FeatureFile, CharDesc); FreeCharDescription(CharDesc); } // LearnBlob
void LearnBlob(FILE* FeatureFile, TBLOB* Blob, TEXTROW* Row, const char* BlobText, const char* FontName) { CHAR_DESC CharDesc; LINE_STATS LineStats; EnterLearnMode; GetLineStatsFromRow(Row, &LineStats); CharDesc = ExtractBlobFeatures (Blob, &LineStats); if (CharDesc == NULL) { cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); return; } // label the features with a class name and font name fprintf (FeatureFile, "\n%s %s ", FontName, BlobText); // write micro-features to file and clean up WriteCharDescription(FeatureFile, CharDesc); FreeCharDescription(CharDesc); } // LearnBlob
// Adapt to recognize the current image as the given character. // The image must be preloaded and be just an image of a single character. void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender) { UNICHAR_ID id = unicharset.unichar_to_id(unichar_repr, length); LINE_STATS LineStats; TEXTROW row; fill_dummy_row(baseline, xheight, descender, ascender, &row); GetLineStatsFromRow(&row, &LineStats); TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender); float threshold; int best_class = 0; float best_rating = -100; // Classify to get a raw choice. LIST result = AdaptiveClassifier(blob, NULL, &row); LIST p; for (p = result; p != NULL; p = p->next) { A_CHOICE *tesschoice = (A_CHOICE *) p->node; if (tesschoice->rating > best_rating) { best_rating = tesschoice->rating; best_class = tesschoice->string[0]; } } FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId); // We have to use char-level adaptation because otherwise // someone should do forced alignment somewhere. void AdaptToChar(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId, FLOAT32 Threshold); if (id == best_class) threshold = GoodAdaptiveMatch; else { /* the blob was incorrectly classified - find the rating threshold needed to create a template which will correct the error with some margin. However, don't waste time trying to make templates which are too tight. */ threshold = GetBestRatingFor(blob, &LineStats, id); threshold *= .9; const float max_threshold = .125; const float min_threshold = .02; if (threshold > max_threshold) threshold = max_threshold; // I have cuddled the following line to set it out of the strike // of the coverage testing tool. I have no idea how to trigger // this situation nor I have any necessity to do it. --mezhirov if (threshold < min_threshold) threshold = min_threshold; } if (blob->outlines) AdaptToChar(blob, &LineStats, id, threshold); free_blob(blob); }
/*---------------------------------------------------------------------------*/ void LearnBlob (const STRING& filename, TBLOB * Blob, TEXTROW * Row, char BlobText[]) /* ** Parameters: ** Blob blob whose micro-features are to be learned ** Row row of text that blob came from ** BlobText text that corresponds to blob ** TextLength number of characters in blob ** Globals: ** imagefile base filename of the page being learned ** classify_font_name ** name of font currently being trained on ** Operation: ** Extract micro-features from the specified blob and append ** them to the appropriate file. ** Return: none ** Exceptions: none ** History: 7/28/89, DSJ, Created. */ #define TRAIN_SUFFIX ".tr" { static FILE *FeatureFile = NULL; STRING Filename(filename); CHAR_DESC CharDesc; LINE_STATS LineStats; EnterLearnMode; GetLineStatsFromRow(Row, &LineStats); CharDesc = ExtractBlobFeatures (Blob, &LineStats); if (CharDesc == NULL) { cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); return; } // If no fontname was set, try to extract it from the filename char CurrFontName[32] = ""; strncpy(CurrFontName, static_cast<STRING>(classify_font_name).string(), 32); /* if (!strcmp(CurrFontName, "UnknownFont")) { // filename is expected to be of the form [lang].[fontname].exp[num] // The [lang], [fontname] and [num] fields should not have '.' characters. const char *basename = strrchr(filename.string(), '/'); const char *firstdot = strchr(basename, '.'); const char *lastdot = strrchr(filename.string(), '.'); if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) { strncpy(CurrFontName, firstdot + 1, lastdot - firstdot - 1); } } //*/ // if a feature file is not yet open, open it // the name of the file is the name of the image plus TRAIN_SUFFIX if (FeatureFile == NULL) { Filename += TRAIN_SUFFIX; FeatureFile = Efopen (Filename.string(), "w"); cprintf ("TRAINING ... Font name = %s\n", CurrFontName); } // label the features with a class name and font name fprintf (FeatureFile, "\n%s %s ", CurrFontName, BlobText); // write micro-features to file and clean up WriteCharDescription(FeatureFile, CharDesc); FreeCharDescription(CharDesc); } // LearnBlob