void tess_training_tester( //call tess const STRING& filename, //filename to output PBLOB *blob, //blob to match DENORM *denorm, //de-normaliser BOOL8 correct, //ly segmented char *text, //correct text inT32 count, //chars in text BLOB_CHOICE_LIST *ratings //list of results ) { TBLOB *tessblob; //converted blob TEXTROW tessrow; //dummy row if (correct) { classify_norm_method.set_value(character); // force char norm spc 30/11/93 tess_bn_matching.set_value(false); //turn it off tess_cn_matching.set_value(false); //convert blob tessblob = make_tess_blob (blob, TRUE); //make dummy row make_tess_row(denorm, &tessrow); //learn it LearnBlob(filename, tessblob, &tessrow, text); free_blob(tessblob); } }
/********************************************************************** * compare_bln_blobs * * Compare 2 baseline normalised blobs and return the rating. **********************************************************************/ float Tesseract::compare_bln_blobs( //match 2 blobs PBLOB *blob1, //first blob DENORM *denorm1, PBLOB *blob2, //other blob DENORM *denorm2) { TBLOB *tblob1; //tessblobs TBLOB *tblob2; TEXTROW tessrow1, tessrow2; //tess rows float rating; //match result tblob1 = make_tess_blob (blob1, TRUE); make_tess_row(denorm1, &tessrow1); tblob2 = make_tess_blob (blob2, TRUE); make_tess_row(denorm2, &tessrow2); rating = compare_tess_blobs (tblob1, &tessrow1, tblob2, &tessrow2); free_blob(tblob1); free_blob(tblob2); return rating; }
/********************************************************************** * make_rotated_tess_blob * * Make a single Tess style blob, applying the given rotation and * renormalizing. **********************************************************************/ TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob, BOOL8 flatten) { if (denorm != NULL && denorm->block() != NULL && denorm->block()->classify_rotation().y() != 0.0) { TBOX box = blob->bounding_box(); int src_width = box.width(); int src_height = box.height(); src_width = static_cast<int>(src_width / denorm->scale() + 0.5); src_height = static_cast<int>(src_height / denorm->scale() + 0.5); int x_middle = (box.left() + box.right()) / 2; int y_middle = (box.top() + box.bottom()) / 2; PBLOB* rotated_blob = PBLOB::deep_copy(blob); rotated_blob->move(FCOORD(-x_middle, -y_middle)); rotated_blob->rotate(denorm->block()->classify_rotation()); ICOORD median_size = denorm->block()->median_size(); int tolerance = median_size.x() / 8; // TODO(dsl/rays) find a better normalization solution. In the mean time // make it work for CJK by normalizing for Cap height in the same way // as is applied in compute_block_xheight when the row is presumed to // be ALLCAPS, i.e. the x-height is the fixed fraction // blob height * textord_merge_x / (textord_merge_x + textord_merge_asc) if (NearlyEqual(src_width, static_cast<int>(median_size.x()), tolerance) && NearlyEqual(src_height, static_cast<int>(median_size.y()), tolerance)) { float target_height = bln_x_height * (textord_merge_x + textord_merge_asc) / textord_merge_x; rotated_blob->scale(target_height / box.width()); rotated_blob->move(FCOORD(0.0f, bln_baseline_offset - rotated_blob->bounding_box().bottom())); } TBLOB* result = make_tess_blob(rotated_blob, flatten); delete rotated_blob; return result; } else { return make_tess_blob(blob, flatten); } }
/// Return a TBLOB * from the whole page_image. /// To be freed later with free_blob(). TBLOB *make_tesseract_blob(float baseline, float xheight, float descender, float ascender) { BLOCK *block = new BLOCK ("a character", TRUE, 0, 0, 0, 0, page_image.get_xsize(), page_image.get_ysize()); // Create C_BLOBs from the page extract_edges( #ifndef GRAPHICS_DISABLED NULL, #endif &page_image, &page_image, ICOORD(page_image.get_xsize(), page_image.get_ysize()), block); // Create one PBLOB from all C_BLOBs C_BLOB_LIST *list = block->blob_list(); C_BLOB_IT c_blob_it(list); PBLOB *pblob = new PBLOB; // will be (hopefully) deleted by the pblob_list for (c_blob_it.mark_cycle_pt(); !c_blob_it.cycled_list(); c_blob_it.forward()) { C_BLOB *c_blob = c_blob_it.data(); PBLOB c_as_p(c_blob, baseline + xheight); merge_blobs(pblob, &c_as_p); } PBLOB_LIST *pblob_list = new PBLOB_LIST; // will be deleted by the word PBLOB_IT pblob_it(pblob_list); pblob_it.add_after_then_move(pblob); // Normalize PBLOB WERD word(pblob_list, 0, " "); ROW *row = make_tess_ocrrow(baseline, xheight, descender, ascender); word.baseline_normalise(row); delete row; // Create a TBLOB from PBLOB return make_tess_blob(pblob, /* flatten: */ TRUE); }
TBLOB *make_tess_blobs( //make tess blobs PBLOB_LIST *bloblist //list to convert ) { PBLOB_IT it = bloblist; //iterator PBLOB *blob; //current blob TBLOB *head; //output list TBLOB *tail; //end of list TBLOB *tessblob; head = NULL; tail = NULL; for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { blob = it.data (); tessblob = make_tess_blob (blob, TRUE); if (head) tail->next = tessblob; else head = tessblob; tail = tessblob; } return head; }
void tess_training_tester( //call tess PBLOB *blob, //blob to match DENORM *denorm, //de-normaliser BOOL8 correct, //ly segmented char *text, //correct text INT32 count, //chars in text BLOB_CHOICE_LIST *ratings //list of results ) { TBLOB *tessblob; //converted blob TEXTROW tessrow; //dummy row if (correct) { NormMethod = character; //Force char norm spc 30/11/93 tess_bn_matching = FALSE; //turn it off tess_cn_matching = FALSE; //convert blob tessblob = make_tess_blob (blob, TRUE); //make dummy row make_tess_row(denorm, &tessrow); //learn it LearnBlob(tessblob, &tessrow, text, count); free_blob(tessblob); } }
void tess_default_matcher( //call tess PBLOB *pblob, //previous blob PBLOB *blob, //blob to match PBLOB *nblob, //next blob WERD *word, //word it came from DENORM *denorm, //de-normaliser BLOB_CHOICE_LIST &ratings //list of results ) { LIST result; //tess output TBLOB *tessblob; //converted blob TEXTROW tessrow; //dummy row tess_bn_matching = FALSE; //turn it off tess_cn_matching = FALSE; //convert blob tessblob = make_tess_blob (blob, TRUE); //make dummy row make_tess_row(denorm, &tessrow); //classify result = AdaptiveClassifier (tessblob, NULL, &tessrow); free_blob(tessblob); //make our format convert_choice_list(result, ratings); }