/********************************************************************** * run_cube_combiner * * Iterates through tesseract's results and calls cube on each word, * combining the results with the existing tesseract result. **********************************************************************/ void Tesseract::run_cube_combiner(PAGE_RES *page_res) { if (page_res == NULL || tess_cube_combiner_ == NULL) return; PAGE_RES_IT page_res_it(page_res); // Iterate through the word results and call cube on each word. for (page_res_it.restart_page(); page_res_it.word () != NULL; page_res_it.forward()) { BLOCK* block = page_res_it.block()->block; if (block->poly_block() != NULL && !block->poly_block()->IsText()) continue; // Don't deal with non-text blocks. WERD_RES* word = page_res_it.word(); // Skip cube entirely if tesseract's certainty is greater than threshold. int combiner_run_thresh = convert_prob_to_tess_certainty( cube_cntxt_->Params()->CombinerRunThresh()); if (word->best_choice->certainty() >= combiner_run_thresh) { continue; } // Use the same language as Tesseract used for the word. Tesseract* lang_tess = word->tesseract; // Setup a trial WERD_RES in which to classify with cube. WERD_RES cube_word; cube_word.InitForRetryRecognition(*word); cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(), OEM_CUBE_ONLY, NULL, false, false, false, page_res_it.row()->row, page_res_it.block()->block); CubeObject *cube_obj = lang_tess->cube_recognize_word( page_res_it.block()->block, &cube_word); if (cube_obj != NULL) lang_tess->cube_combine_word(cube_obj, &cube_word, word); delete cube_obj; } }
/// Tests the chopper by exhaustively running chop_one_blob. /// The word_res will contain filled chopped_word, seam_array, denorm, /// box_word and best_state for the maximally chopped word. void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes, BLOCK* block, ROW* row, WERD_RES* word_res) { if (!word_res->SetupForRecognition(unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL, classify_bln_numeric_mode, textord_use_cjk_fp_model, poly_allow_detailed_fx, row, block)) { word_res->CloneChoppedToRebuild(); return; } if (chop_debug) { tprintf("Maximally chopping word at:"); word_res->word->bounding_box().print(); } GenericVector<BLOB_CHOICE*> blob_choices; ASSERT_HOST(!word_res->chopped_word->blobs.empty()); float rating = static_cast<float>(MAX_INT8); for (int i = 0; i < word_res->chopped_word->NumBlobs(); ++i) { // The rating and certainty are not quite arbitrary. Since // select_blob_to_chop uses the worst certainty to choose, they all have // to be different, so starting with MAX_INT8, subtract 1/8 for each blob // in here, and then divide by e each time they are chopped, which // should guarantee a set of unequal values for the whole tree of blobs // produced, however much chopping is required. The chops are thus only // limited by the ability of the chopper to find suitable chop points, // and not by the value of the certainties. BLOB_CHOICE* choice = new BLOB_CHOICE(0, rating, -rating, -1, 0.0f, 0.0f, 0.0f, BCC_FAKE); blob_choices.push_back(choice); rating -= 0.125f; } const double e = exp(1.0); // The base of natural logs. int blob_number; int right_chop_index = 0; if (!assume_fixed_pitch_char_segment) { // We only chop if the language is not fixed pitch like CJK. SEAM* seam = NULL; while ((seam = chop_one_blob(boxes, blob_choices, word_res, &blob_number)) != NULL) { word_res->InsertSeam(blob_number, seam); BLOB_CHOICE* left_choice = blob_choices[blob_number]; rating = left_choice->rating() / e; left_choice->set_rating(rating); left_choice->set_certainty(-rating); // combine confidence w/ serial # BLOB_CHOICE* right_choice = new BLOB_CHOICE(++right_chop_index, rating - 0.125f, -rating, -1, 0.0f, 0.0f, 0.0f, BCC_FAKE); blob_choices.insert(right_choice, blob_number + 1); } } word_res->CloneChoppedToRebuild(); word_res->FakeClassifyWord(blob_choices.size(), &blob_choices[0]); }
// Helper gets the image of a rectangle, using the block.re_rotation() if // needed to get to the image, and rotating the result back to horizontal // layout. (CJK characters will be on their left sides) The vertical text flag // is set in the returned ImageData if the text was originally vertical, which // can be used to invoke a different CJK recognition engine. The revised_box // is also returned to enable calculation of output bounding boxes. ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block, int padding, TBOX* revised_box) const { TBOX wbox = box; wbox.pad(padding, padding); *revised_box = wbox; // Number of clockwise 90 degree rotations needed to get back to tesseract // coords from the clipped image. int num_rotations = 0; if (block.re_rotation().y() > 0.0f) num_rotations = 1; else if (block.re_rotation().x() < 0.0f) num_rotations = 2; else if (block.re_rotation().y() < 0.0f) num_rotations = 3; // Handle two cases automatically: 1 the box came from the block, 2 the box // came from a box file, and refers to the image, which the block may not. if (block.bounding_box().major_overlap(*revised_box)) revised_box->rotate(block.re_rotation()); // Now revised_box always refers to the image. // BestPix is never colormapped, but may be of any depth. Pix* pix = BestPix(); int width = pixGetWidth(pix); int height = pixGetHeight(pix); TBOX image_box(0, 0, width, height); // Clip to image bounds; *revised_box &= image_box; if (revised_box->null_box()) return NULL; Box* clip_box = boxCreate(revised_box->left(), height - revised_box->top(), revised_box->width(), revised_box->height()); Pix* box_pix = pixClipRectangle(pix, clip_box, NULL); if (box_pix == NULL) return NULL; boxDestroy(&clip_box); if (num_rotations > 0) { Pix* rot_pix = pixRotateOrth(box_pix, num_rotations); pixDestroy(&box_pix); box_pix = rot_pix; } // Convert sub-8-bit images to 8 bit. int depth = pixGetDepth(box_pix); if (depth < 8) { Pix* grey; grey = pixConvertTo8(box_pix, false); pixDestroy(&box_pix); box_pix = grey; } bool vertical_text = false; if (num_rotations > 0) { // Rotated the clipped revised box back to internal coordinates. FCOORD rotation(block.re_rotation().x(), -block.re_rotation().y()); revised_box->rotate(rotation); if (num_rotations != 2) vertical_text = true; } return new ImageData(vertical_text, box_pix); }
/** * word_bln_display() * * Normalize word and display in word window */ BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) { TWERD *bln_word = word_res->chopped_word; if (bln_word == NULL) { word_res->SetupForTessRecognition(unicharset, this, BestPix(), false, this->textord_use_cjk_fp_model, row, block); bln_word = word_res->chopped_word; } bln_word_window_handle()->Clear(); display_bln_lines(bln_word_window_handle(), ScrollView::CYAN, 1.0, 0.0f, -1000.0f, 1000.0f); bln_word->plot(bln_word_window_handle()); bln_word_window_handle()->Update(); return TRUE; }
// page_res is non-const because the iterator doesn't know if you are going // to change the items it points to! Really a const here though. void Tesseract::blob_feature_display(PAGE_RES* page_res, const TBOX& selection_box) { ROW* row; // row of word BLOCK* block; // block of word WERD* word = make_pseudo_word(page_res, selection_box, block, row); if (word != NULL) { WERD_RES word_res(word); word_res.x_height = row->x_height(); word_res.SetupForRecognition(unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL, classify_bln_numeric_mode, textord_use_cjk_fp_model, poly_allow_detailed_fx, row, block); TWERD* bln_word = word_res.chopped_word; TBLOB* bln_blob = bln_word->blobs[0]; INT_FX_RESULT_STRUCT fx_info; GenericVector<INT_FEATURE_STRUCT> bl_features; GenericVector<INT_FEATURE_STRUCT> cn_features; Classify::ExtractFeatures(*bln_blob, classify_nonlinear_norm, &bl_features, &cn_features, &fx_info, NULL); // Display baseline features. ScrollView* bl_win = CreateFeatureSpaceWindow("BL Features", 512, 0); ClearFeatureSpaceWindow(baseline, bl_win); for (int f = 0; f < bl_features.size(); ++f) RenderIntFeature(bl_win, &bl_features[f], ScrollView::GREEN); bl_win->Update(); // Display cn features. ScrollView* cn_win = CreateFeatureSpaceWindow("CN Features", 512, 0); ClearFeatureSpaceWindow(character, cn_win); for (int f = 0; f < cn_features.size(); ++f) RenderIntFeature(cn_win, &cn_features[f], ScrollView::GREEN); cn_win->Update(); delete word; } }
/** * word_bln_display() * * Normalize word and display in word window */ BOOL8 Tesseract::word_bln_display(PAGE_RES_IT* pr_it) { WERD_RES* word_res = pr_it->word(); if (word_res->chopped_word == NULL) { // Setup word normalization parameters. word_res->SetupForRecognition(unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL, classify_bln_numeric_mode, textord_use_cjk_fp_model, poly_allow_detailed_fx, pr_it->row()->row, pr_it->block()->block); } bln_word_window_handle()->Clear(); display_bln_lines(bln_word_window_handle(), ScrollView::CYAN, 1.0, 0.0f, -1000.0f, 1000.0f); C_BLOB_IT it(word_res->word->cblob_list()); ScrollView::Color color = WERD::NextColor(ScrollView::BLACK); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->plot_normed(word_res->denorm, color, ScrollView::BROWN, bln_word_window_handle()); color = WERD::NextColor(color); } bln_word_window_handle()->Update(); return TRUE; }