void tess_training_tester(                           //call tess
                          const STRING& filename,    //filename to output
                          PBLOB *blob,               //blob to match
                          DENORM *denorm,            //de-normaliser
                          BOOL8 correct,             //ly segmented
                          char *text,                //correct text
                          inT32 count,               //chars in text
                          BLOB_CHOICE_LIST *ratings  //list of results
                         ) {
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  if (correct) {
    classify_norm_method.set_value(character); // force char norm spc 30/11/93
    tess_bn_matching.set_value(false);    //turn it off
    tess_cn_matching.set_value(false);
                                 //convert blob
    tessblob = make_tess_blob (blob, TRUE);
                                 //make dummy row
    make_tess_row(denorm, &tessrow);
                                 //learn it
    LearnBlob(filename, tessblob, &tessrow, text);
    free_blob(tessblob);
  }
}
Ejemplo n.º 2
0
/**********************************************************************
 * compare_bln_blobs
 *
 * Compare 2 baseline normalised blobs and return the rating.
 **********************************************************************/
float Tesseract::compare_bln_blobs(               //match 2 blobs
                                   PBLOB *blob1,  //first blob
                                   DENORM *denorm1,
                                   PBLOB *blob2,  //other blob
                                   DENORM *denorm2) {
  TBLOB *tblob1;                 //tessblobs
  TBLOB *tblob2;
  TEXTROW tessrow1, tessrow2;    //tess rows
  float rating;                  //match result

  tblob1 = make_tess_blob (blob1, TRUE);
  make_tess_row(denorm1, &tessrow1); 
  tblob2 = make_tess_blob (blob2, TRUE);
  make_tess_row(denorm2, &tessrow2); 
  rating = compare_tess_blobs (tblob1, &tessrow1, tblob2, &tessrow2);
  free_blob(tblob1); 
  free_blob(tblob2); 

  return rating;
}
void Tesseract::tess_bn_matcher(                           //call tess
                                PBLOB *pblob,              //previous blob
                                PBLOB *blob,               //blob to match
                                PBLOB *nblob,              //next blob
                                WERD *word,                //word it came from
                                DENORM *denorm,            //de-normaliser
                                BLOB_CHOICE_LIST *ratings  //list of results
                               ) {
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  tess_bn_matching.set_value(true);       //turn it on
  tess_cn_matching.set_value(false);
                                 //convert blob
  tessblob = make_rotated_tess_blob(denorm, blob, true);
                                 //make dummy row
  make_tess_row(denorm, &tessrow);
                                 //classify
  AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, NULL);
  free_blob(tessblob);
}
Ejemplo n.º 4
0
void tess_training_tester(                           //call tess
                          PBLOB *blob,               //blob to match
                          DENORM *denorm,            //de-normaliser
                          BOOL8 correct,             //ly segmented
                          char *text,                //correct text
                          INT32 count,               //chars in text
                          BLOB_CHOICE_LIST *ratings  //list of results
                         ) {
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  if (correct) {
    NormMethod = character;              //Force char norm spc 30/11/93
    tess_bn_matching = FALSE;    //turn it off
    tess_cn_matching = FALSE;
                                 //convert blob
    tessblob = make_tess_blob (blob, TRUE);
                                 //make dummy row
    make_tess_row(denorm, &tessrow);
                                 //learn it
    LearnBlob(tessblob, &tessrow, text, count);
    free_blob(tessblob);
  }
}
Ejemplo n.º 5
0
void tess_default_matcher(                           //call tess
                          PBLOB *pblob,              //previous blob
                          PBLOB *blob,               //blob to match
                          PBLOB *nblob,              //next blob
                          WERD *word,                //word it came from
                          DENORM *denorm,            //de-normaliser
                          BLOB_CHOICE_LIST &ratings  //list of results
                         ) {
  LIST result;                   //tess output
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  tess_bn_matching = FALSE;      //turn it off
  tess_cn_matching = FALSE;
                                 //convert blob
  tessblob = make_tess_blob (blob, TRUE);
                                 //make dummy row
  make_tess_row(denorm, &tessrow);
                                 //classify
  result = AdaptiveClassifier (tessblob, NULL, &tessrow);
  free_blob(tessblob);
                                 //make our format
  convert_choice_list(result, ratings);
}
Ejemplo n.º 6
0
// Adapt to recognize the current image as the given character.
// The image must be preloaded and be just an image of a single character.
void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
                                   int length,
                                   float baseline,
                                   float xheight,
                                   float descender,
                                   float ascender) {
  UNICHAR_ID id = unicharset.unichar_to_id(unichar_repr, length);
  LINE_STATS LineStats;
  TEXTROW row;
  fill_dummy_row(baseline, xheight, descender, ascender, &row);
  GetLineStatsFromRow(&row, &LineStats);

  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender);
  float threshold;
  int best_class = 0;
  float best_rating = -100;


  // Classify to get a raw choice.
  LIST result = AdaptiveClassifier(blob, NULL, &row);
  LIST p;
  for (p = result; p != NULL; p = p->next) {
    A_CHOICE *tesschoice = (A_CHOICE *) p->node;
    if (tesschoice->rating > best_rating) {
      best_rating = tesschoice->rating;
      best_class = tesschoice->string[0];
    }
  }

  FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId);

  // We have to use char-level adaptation because otherwise
  // someone should do forced alignment somewhere.
  void AdaptToChar(TBLOB *Blob,
                   LINE_STATS *LineStats,
                   CLASS_ID ClassId,
                   FLOAT32 Threshold);


  if (id == best_class)
    threshold = GoodAdaptiveMatch;
  else {
    /* the blob was incorrectly classified - find the rating threshold
       needed to create a template which will correct the error with
       some margin.  However, don't waste time trying to make
       templates which are too tight. */
    threshold = GetBestRatingFor(blob, &LineStats, id);
    threshold *= .9;
    const float max_threshold = .125;
    const float min_threshold = .02;

    if (threshold > max_threshold)
        threshold = max_threshold;

    // I have cuddled the following line to set it out of the strike
    // of the coverage testing tool. I have no idea how to trigger
    // this situation nor I have any necessity to do it. --mezhirov
    if (threshold < min_threshold) threshold = min_threshold;
  }

  if (blob->outlines)
    AdaptToChar(blob, &LineStats, id, threshold);
  free_blob(blob);
}