void tess_training_tester(                           //call tess
                          const STRING& filename,    //filename to output
                          PBLOB *blob,               //blob to match
                          DENORM *denorm,            //de-normaliser
                          BOOL8 correct,             //ly segmented
                          char *text,                //correct text
                          inT32 count,               //chars in text
                          BLOB_CHOICE_LIST *ratings  //list of results
                         ) {
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  if (correct) {
    classify_norm_method.set_value(character); // force char norm spc 30/11/93
    tess_bn_matching.set_value(false);    //turn it off
    tess_cn_matching.set_value(false);
                                 //convert blob
    tessblob = make_tess_blob (blob, TRUE);
                                 //make dummy row
    make_tess_row(denorm, &tessrow);
                                 //learn it
    LearnBlob(filename, tessblob, &tessrow, text);
    free_blob(tessblob);
  }
}
Пример #2
0
/**********************************************************************
 * compare_bln_blobs
 *
 * Compare 2 baseline normalised blobs and return the rating.
 **********************************************************************/
float Tesseract::compare_bln_blobs(               //match 2 blobs
                                   PBLOB *blob1,  //first blob
                                   DENORM *denorm1,
                                   PBLOB *blob2,  //other blob
                                   DENORM *denorm2) {
  TBLOB *tblob1;                 //tessblobs
  TBLOB *tblob2;
  TEXTROW tessrow1, tessrow2;    //tess rows
  float rating;                  //match result

  tblob1 = make_tess_blob (blob1, TRUE);
  make_tess_row(denorm1, &tessrow1); 
  tblob2 = make_tess_blob (blob2, TRUE);
  make_tess_row(denorm2, &tessrow2); 
  rating = compare_tess_blobs (tblob1, &tessrow1, tblob2, &tessrow2);
  free_blob(tblob1); 
  free_blob(tblob2); 

  return rating;
}
Пример #3
0
/**********************************************************************
 * make_rotated_tess_blob
 *
 * Make a single Tess style blob, applying the given rotation and
 * renormalizing.
 **********************************************************************/
TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob,
                              BOOL8 flatten) {
  if (denorm != NULL && denorm->block() != NULL &&
      denorm->block()->classify_rotation().y() != 0.0) {
    TBOX box = blob->bounding_box();
    int src_width = box.width();
    int src_height = box.height();
    src_width = static_cast<int>(src_width / denorm->scale() + 0.5);
    src_height = static_cast<int>(src_height / denorm->scale() + 0.5);
    int x_middle = (box.left() + box.right()) / 2;
    int y_middle = (box.top() + box.bottom()) / 2;
    PBLOB* rotated_blob = PBLOB::deep_copy(blob);
    rotated_blob->move(FCOORD(-x_middle, -y_middle));
    rotated_blob->rotate(denorm->block()->classify_rotation());
    ICOORD median_size = denorm->block()->median_size();
    int tolerance = median_size.x() / 8;
    // TODO(dsl/rays) find a better normalization solution. In the mean time
    // make it work for CJK by normalizing for Cap height in the same way
    // as is applied in compute_block_xheight when the row is presumed to
    // be ALLCAPS, i.e. the x-height is the fixed fraction
    // blob height * textord_merge_x / (textord_merge_x + textord_merge_asc)
    if (NearlyEqual(src_width, static_cast<int>(median_size.x()), tolerance) &&
        NearlyEqual(src_height, static_cast<int>(median_size.y()), tolerance)) {
      float target_height = bln_x_height * (textord_merge_x + textord_merge_asc)
                          / textord_merge_x;
      rotated_blob->scale(target_height / box.width());
      rotated_blob->move(FCOORD(0.0f,
                                bln_baseline_offset -
                                  rotated_blob->bounding_box().bottom()));
    }
    TBLOB* result = make_tess_blob(rotated_blob, flatten);
    delete rotated_blob;
    return result;
  } else {
    return make_tess_blob(blob, flatten);
  }
}
Пример #4
0
/// Return a TBLOB * from the whole page_image.
/// To be freed later with free_blob().
TBLOB *make_tesseract_blob(float baseline, float xheight, float descender, float ascender) {
  BLOCK *block = new BLOCK ("a character",
                            TRUE,
                            0, 0,
                            0, 0,
                            page_image.get_xsize(),
                            page_image.get_ysize());

  // Create C_BLOBs from the page
  extract_edges(
#ifndef GRAPHICS_DISABLED
	            NULL,
#endif
	            &page_image, &page_image,
                ICOORD(page_image.get_xsize(), page_image.get_ysize()),
                block);

  // Create one PBLOB from all C_BLOBs
  C_BLOB_LIST *list = block->blob_list();
  C_BLOB_IT c_blob_it(list);
  PBLOB *pblob = new PBLOB; // will be (hopefully) deleted by the pblob_list
  for (c_blob_it.mark_cycle_pt();
       !c_blob_it.cycled_list();
       c_blob_it.forward()) {
      C_BLOB *c_blob = c_blob_it.data();
      PBLOB c_as_p(c_blob, baseline + xheight);
      merge_blobs(pblob, &c_as_p);
  }
  PBLOB_LIST *pblob_list = new PBLOB_LIST; // will be deleted by the word
  PBLOB_IT pblob_it(pblob_list);
  pblob_it.add_after_then_move(pblob);

  // Normalize PBLOB
  WERD word(pblob_list, 0, " ");
  ROW *row = make_tess_ocrrow(baseline, xheight, descender, ascender);
  word.baseline_normalise(row);
  delete row;

  // Create a TBLOB from PBLOB
  return make_tess_blob(pblob, /* flatten: */ TRUE);
}
Пример #5
0
TBLOB *make_tess_blobs(                      //make tess blobs
                       PBLOB_LIST *bloblist  //list to convert
                      ) {
  PBLOB_IT it = bloblist;        //iterator
  PBLOB *blob;                   //current blob
  TBLOB *head;                   //output list
  TBLOB *tail;                   //end of list
  TBLOB *tessblob;

  head = NULL;
  tail = NULL;
  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    blob = it.data ();
    tessblob = make_tess_blob (blob, TRUE);
    if (head)
      tail->next = tessblob;
    else
      head = tessblob;
    tail = tessblob;
  }
  return head;
}
Пример #6
0
void tess_training_tester(                           //call tess
                          PBLOB *blob,               //blob to match
                          DENORM *denorm,            //de-normaliser
                          BOOL8 correct,             //ly segmented
                          char *text,                //correct text
                          INT32 count,               //chars in text
                          BLOB_CHOICE_LIST *ratings  //list of results
                         ) {
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  if (correct) {
    NormMethod = character;              //Force char norm spc 30/11/93
    tess_bn_matching = FALSE;    //turn it off
    tess_cn_matching = FALSE;
                                 //convert blob
    tessblob = make_tess_blob (blob, TRUE);
                                 //make dummy row
    make_tess_row(denorm, &tessrow);
                                 //learn it
    LearnBlob(tessblob, &tessrow, text, count);
    free_blob(tessblob);
  }
}
Пример #7
0
void tess_default_matcher(                           //call tess
                          PBLOB *pblob,              //previous blob
                          PBLOB *blob,               //blob to match
                          PBLOB *nblob,              //next blob
                          WERD *word,                //word it came from
                          DENORM *denorm,            //de-normaliser
                          BLOB_CHOICE_LIST &ratings  //list of results
                         ) {
  LIST result;                   //tess output
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  tess_bn_matching = FALSE;      //turn it off
  tess_cn_matching = FALSE;
                                 //convert blob
  tessblob = make_tess_blob (blob, TRUE);
                                 //make dummy row
  make_tess_row(denorm, &tessrow);
                                 //classify
  result = AdaptiveClassifier (tessblob, NULL, &tessrow);
  free_blob(tessblob);
                                 //make our format
  convert_choice_list(result, ratings);
}