Esempio n. 1
0
/*---------------------------------------------------------------------------*/
void
LearnBlob (TBLOB * Blob, TEXTROW * Row, char BlobText[], int TextLength)
/*
 **      Parameters:
 **              Blob            blob whose micro-features are to be learned
 **              Row             row of text that blob came from
 **              BlobText        text that corresponds to blob
 **              TextLength      number of characters in blob
 **      Globals:
 **              imagefile       base filename of the page being learned
 **              FontName        name of font currently being trained on
 **      Operation:
 **              Extract micro-features from the specified blob and append
 **              them to the appropriate file.
 **      Return: none
 **      Exceptions: none
 **      History: 7/28/89, DSJ, Created.
 */
#define MAXFILENAME     80
#define MAXCHARNAME     20
#define MAXFONTNAME     20
#define TRAIN_SUFFIX    ".tr"
{
  static FILE *FeatureFile = NULL;
  char Filename[MAXFILENAME];
  char CharName[MAXCHARNAME];
  CHAR_DESC CharDesc;
  LINE_STATS LineStats;

  EnterLearnMode;

  // throw out blobs which do not represent only one character
  if (TextLength != 1)
    return;

  GetLineStatsFromRow(Row, &LineStats); 

  CharDesc = ExtractBlobFeatures (Blob, &LineStats);

  // if a feature file is not yet open, open it
  // the name of the file is the name of the image plus TRAIN_SUFFIX
  if (FeatureFile == NULL) {
    strcpy(Filename, imagefile); 
    strcat(Filename, TRAIN_SUFFIX); 
    FeatureFile = Efopen (Filename, "w");

    cprintf ("TRAINING ... Font name = %s.\n", FontName);
  }

  // get the name of the character for this blob
  chartoname (CharName, BlobText[0], "");

  // label the features with a class name and font name
  fprintf (FeatureFile, "\n%s %s ", FontName, CharName);

  // write micro-features to file and clean up
  WriteCharDescription(FeatureFile, CharDesc); 
  FreeCharDescription(CharDesc); 

}                                // LearnBlob
void LearnBlob(FILE* FeatureFile, TBLOB* Blob, TEXTROW* Row,
               const char* BlobText, const char* FontName) {
  CHAR_DESC CharDesc;
  LINE_STATS LineStats;

  EnterLearnMode;

  GetLineStatsFromRow(Row, &LineStats);

  CharDesc = ExtractBlobFeatures (Blob, &LineStats);
  if (CharDesc == NULL) {
    cprintf("LearnBLob: CharDesc was NULL. Aborting.\n");
    return;
  }

  // label the features with a class name and font name
  fprintf (FeatureFile, "\n%s %s ", FontName, BlobText);

  // write micro-features to file and clean up
  WriteCharDescription(FeatureFile, CharDesc);
  FreeCharDescription(CharDesc);

}                                // LearnBlob
Esempio n. 3
0
// Adapt to recognize the current image as the given character.
// The image must be preloaded and be just an image of a single character.
void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
                                   int length,
                                   float baseline,
                                   float xheight,
                                   float descender,
                                   float ascender) {
  UNICHAR_ID id = unicharset.unichar_to_id(unichar_repr, length);
  LINE_STATS LineStats;
  TEXTROW row;
  fill_dummy_row(baseline, xheight, descender, ascender, &row);
  GetLineStatsFromRow(&row, &LineStats);

  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender);
  float threshold;
  int best_class = 0;
  float best_rating = -100;


  // Classify to get a raw choice.
  LIST result = AdaptiveClassifier(blob, NULL, &row);
  LIST p;
  for (p = result; p != NULL; p = p->next) {
    A_CHOICE *tesschoice = (A_CHOICE *) p->node;
    if (tesschoice->rating > best_rating) {
      best_rating = tesschoice->rating;
      best_class = tesschoice->string[0];
    }
  }

  FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId);

  // We have to use char-level adaptation because otherwise
  // someone should do forced alignment somewhere.
  void AdaptToChar(TBLOB *Blob,
                   LINE_STATS *LineStats,
                   CLASS_ID ClassId,
                   FLOAT32 Threshold);


  if (id == best_class)
    threshold = GoodAdaptiveMatch;
  else {
    /* the blob was incorrectly classified - find the rating threshold
       needed to create a template which will correct the error with
       some margin.  However, don't waste time trying to make
       templates which are too tight. */
    threshold = GetBestRatingFor(blob, &LineStats, id);
    threshold *= .9;
    const float max_threshold = .125;
    const float min_threshold = .02;

    if (threshold > max_threshold)
        threshold = max_threshold;

    // I have cuddled the following line to set it out of the strike
    // of the coverage testing tool. I have no idea how to trigger
    // this situation nor I have any necessity to do it. --mezhirov
    if (threshold < min_threshold) threshold = min_threshold;
  }

  if (blob->outlines)
    AdaptToChar(blob, &LineStats, id, threshold);
  free_blob(blob);
}
Esempio n. 4
0
/*---------------------------------------------------------------------------*/
void
LearnBlob (const STRING& filename,
           TBLOB * Blob, TEXTROW * Row, char BlobText[])
/*
 **      Parameters:
 **              Blob            blob whose micro-features are to be learned
 **              Row             row of text that blob came from
 **              BlobText        text that corresponds to blob
 **              TextLength      number of characters in blob
 **      Globals:
 **              imagefile       base filename of the page being learned
 **              classify_font_name
 **                              name of font currently being trained on
 **      Operation:
 **              Extract micro-features from the specified blob and append
 **              them to the appropriate file.
 **      Return: none
 **      Exceptions: none
 **      History: 7/28/89, DSJ, Created.
 */
#define TRAIN_SUFFIX    ".tr"
{
  static FILE *FeatureFile = NULL;
  STRING Filename(filename);
  CHAR_DESC CharDesc;
  LINE_STATS LineStats;

  EnterLearnMode;

  GetLineStatsFromRow(Row, &LineStats);

  CharDesc = ExtractBlobFeatures (Blob, &LineStats);
  if (CharDesc == NULL) {
    cprintf("LearnBLob: CharDesc was NULL. Aborting.\n");
    return;
  }

  // If no fontname was set, try to extract it from the filename
  char CurrFontName[32] = "";
  strncpy(CurrFontName, static_cast<STRING>(classify_font_name).string(), 32);
/*
  if (!strcmp(CurrFontName, "UnknownFont")) {
    // filename is expected to be of the form [lang].[fontname].exp[num]
    // The [lang], [fontname] and [num] fields should not have '.' characters.
    const char *basename = strrchr(filename.string(), '/');
    const char *firstdot  = strchr(basename, '.');
    const char *lastdot  = strrchr(filename.string(), '.');
    if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) {
      strncpy(CurrFontName, firstdot + 1, lastdot - firstdot - 1);
    }
  }
//*/
  // if a feature file is not yet open, open it
  // the name of the file is the name of the image plus TRAIN_SUFFIX
  if (FeatureFile == NULL) {
    Filename += TRAIN_SUFFIX;
    FeatureFile = Efopen (Filename.string(), "w");
    cprintf ("TRAINING ... Font name = %s\n", CurrFontName);
  }

  // label the features with a class name and font name
  fprintf (FeatureFile, "\n%s %s ", CurrFontName, BlobText);

  // write micro-features to file and clean up
  WriteCharDescription(FeatureFile, CharDesc);
  FreeCharDescription(CharDesc);

}                                // LearnBlob