// Extracts features from the given blob and saves them in the tr_file_data_ // member variable. // fontname: Name of font that this blob was printed in. // cn_denorm: Character normalization transformation to apply to the blob. // fx_info: Character normalization parameters computed with cn_denorm. // blob_text: Ground truth text for the blob. void Classify::LearnBlob(const STRING& fontname, TBLOB* blob, const DENORM& cn_denorm, const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text) { CHAR_DESC CharDesc = NewCharDescription(feature_defs_); CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm); CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info); CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info); CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info); if (ValidCharDescription(feature_defs_, CharDesc)) { // Label the features with a class name and font name. tr_file_data_ += "\n"; tr_file_data_ += fontname; tr_file_data_ += " "; tr_file_data_ += blob_text; tr_file_data_ += "\n"; // write micro-features to file and clean up WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_); } else { tprintf("Blob learned was invalid!\n"); } FreeCharDescription(CharDesc); } // LearnBlob
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* FeatureFile, TBLOB* Blob, const DENORM& denorm, const char* BlobText, const char* FontName) { CHAR_DESC CharDesc; ASSERT_HOST(FeatureFile != NULL); CharDesc = ExtractBlobFeatures(FeatureDefs, denorm, Blob); if (CharDesc == NULL) { cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); return; } if (ValidCharDescription(FeatureDefs, CharDesc)) { // label the features with a class name and font name fprintf(FeatureFile, "\n%s %s\n", FontName, BlobText); // write micro-features to file and clean up WriteCharDescription(FeatureDefs, FeatureFile, CharDesc); } else { tprintf("Blob learned was invalid!\n"); } FreeCharDescription(CharDesc); } // LearnBlob
/*---------------------------------------------------------------------------*/ void LearnBlob (TBLOB * Blob, TEXTROW * Row, char BlobText[], int TextLength) /* ** Parameters: ** Blob blob whose micro-features are to be learned ** Row row of text that blob came from ** BlobText text that corresponds to blob ** TextLength number of characters in blob ** Globals: ** imagefile base filename of the page being learned ** FontName name of font currently being trained on ** Operation: ** Extract micro-features from the specified blob and append ** them to the appropriate file. ** Return: none ** Exceptions: none ** History: 7/28/89, DSJ, Created. */ #define MAXFILENAME 80 #define MAXCHARNAME 20 #define MAXFONTNAME 20 #define TRAIN_SUFFIX ".tr" { static FILE *FeatureFile = NULL; char Filename[MAXFILENAME]; char CharName[MAXCHARNAME]; CHAR_DESC CharDesc; LINE_STATS LineStats; EnterLearnMode; // throw out blobs which do not represent only one character if (TextLength != 1) return; GetLineStatsFromRow(Row, &LineStats); CharDesc = ExtractBlobFeatures (Blob, &LineStats); // if a feature file is not yet open, open it // the name of the file is the name of the image plus TRAIN_SUFFIX if (FeatureFile == NULL) { strcpy(Filename, imagefile); strcat(Filename, TRAIN_SUFFIX); FeatureFile = Efopen (Filename, "w"); cprintf ("TRAINING ... Font name = %s.\n", FontName); } // get the name of the character for this blob chartoname (CharName, BlobText[0], ""); // label the features with a class name and font name fprintf (FeatureFile, "\n%s %s ", FontName, CharName); // write micro-features to file and clean up WriteCharDescription(FeatureFile, CharDesc); FreeCharDescription(CharDesc); } // LearnBlob
void LearnBlob(FILE* FeatureFile, TBLOB* Blob, TEXTROW* Row, const char* BlobText, const char* FontName) { CHAR_DESC CharDesc; LINE_STATS LineStats; EnterLearnMode; GetLineStatsFromRow(Row, &LineStats); CharDesc = ExtractBlobFeatures (Blob, &LineStats); if (CharDesc == NULL) { cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); return; } // label the features with a class name and font name fprintf (FeatureFile, "\n%s %s ", FontName, BlobText); // write micro-features to file and clean up WriteCharDescription(FeatureFile, CharDesc); FreeCharDescription(CharDesc); } // LearnBlob
/*---------------------------------------------------------------------------*/ void LearnBlob (const STRING& filename, TBLOB * Blob, TEXTROW * Row, char BlobText[]) /* ** Parameters: ** Blob blob whose micro-features are to be learned ** Row row of text that blob came from ** BlobText text that corresponds to blob ** TextLength number of characters in blob ** Globals: ** imagefile base filename of the page being learned ** classify_font_name ** name of font currently being trained on ** Operation: ** Extract micro-features from the specified blob and append ** them to the appropriate file. ** Return: none ** Exceptions: none ** History: 7/28/89, DSJ, Created. */ #define TRAIN_SUFFIX ".tr" { static FILE *FeatureFile = NULL; STRING Filename(filename); CHAR_DESC CharDesc; LINE_STATS LineStats; EnterLearnMode; GetLineStatsFromRow(Row, &LineStats); CharDesc = ExtractBlobFeatures (Blob, &LineStats); if (CharDesc == NULL) { cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); return; } // If no fontname was set, try to extract it from the filename char CurrFontName[32] = ""; strncpy(CurrFontName, static_cast<STRING>(classify_font_name).string(), 32); /* if (!strcmp(CurrFontName, "UnknownFont")) { // filename is expected to be of the form [lang].[fontname].exp[num] // The [lang], [fontname] and [num] fields should not have '.' characters. const char *basename = strrchr(filename.string(), '/'); const char *firstdot = strchr(basename, '.'); const char *lastdot = strrchr(filename.string(), '.'); if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) { strncpy(CurrFontName, firstdot + 1, lastdot - firstdot - 1); } } //*/ // if a feature file is not yet open, open it // the name of the file is the name of the image plus TRAIN_SUFFIX if (FeatureFile == NULL) { Filename += TRAIN_SUFFIX; FeatureFile = Efopen (Filename.string(), "w"); cprintf ("TRAINING ... Font name = %s\n", CurrFontName); } // label the features with a class name and font name fprintf (FeatureFile, "\n%s %s ", CurrFontName, BlobText); // write micro-features to file and clean up WriteCharDescription(FeatureFile, CharDesc); FreeCharDescription(CharDesc); } // LearnBlob