// If we were supposed to return only a single textline, and there is more // than one, clean up and leave only the best. void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res) { if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) return; // No cleanup required. PAGE_RES_IT it(page_res); // Find the best row, being the greatest mean word conf. float row_total_conf = 0.0f; int row_word_count = 0; ROW_RES *best_row = NULL; float best_conf = 0.0f; for (it.restart_page(); it.word() != NULL; it.forward()) { WERD_RES *word = it.word(); row_total_conf += word->best_choice->certainty(); ++row_word_count; if (it.next_row() != it.row()) { row_total_conf /= row_word_count; if (best_row == NULL || best_conf < row_total_conf) { best_row = it.row(); best_conf = row_total_conf; } row_total_conf = 0.0f; row_word_count = 0; } } // Now eliminate any word not in the best row. for (it.restart_page(); it.word() != NULL; it.forward()) { if (it.row() != best_row) it.DeleteCurrentWord(); } }
/** * Segment the page according to the current value of tessedit_pageseg_mode. * pix_binary_ is used as the source image and should not be NULL. * On return the blocks list owns all the constructed page layout. */ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr) { ASSERT_HOST(pix_binary_ != NULL); int width = pixGetWidth(pix_binary_); int height = pixGetHeight(pix_binary_); // Get page segmentation mode. PageSegMode pageseg_mode = static_cast<PageSegMode>( static_cast<int>(tessedit_pageseg_mode)); // If a UNLV zone file can be found, use that instead of segmentation. if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != NULL && input_file->length() > 0) { STRING name = *input_file; const char* lastdot = strrchr(name.string(), '.'); if (lastdot != NULL) name[lastdot - name.string()] = '\0'; read_unlv_file(name, width, height, blocks); } if (blocks->empty()) { // No UNLV file present. Work according to the PageSegMode. // First make a single block covering the whole image. BLOCK_IT block_it(blocks); BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); block->set_right_to_left(right_to_left()); block_it.add_to_end(block); } else { // UNLV file present. Use PSM_SINGLE_BLOCK. pageseg_mode = PSM_SINGLE_BLOCK; } // The diacritic_blobs holds noise blobs that may be diacritics. They // are separated out on areas of the image that seem noisy and short-circuit // the layout process, going straight from the initial partition creation // right through to after word segmentation, where they are added to the // rej_cblobs list of the most appropriate word. From there classification // will determine whether they are used. BLOBNBOX_LIST diacritic_blobs; int auto_page_seg_ret_val = 0; TO_BLOCK_LIST to_blocks; if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) { auto_page_seg_ret_val = AutoPageSeg( pageseg_mode, blocks, &to_blocks, enable_noise_removal ? &diacritic_blobs : NULL, osd_tess, osr); if (pageseg_mode == PSM_OSD_ONLY) return auto_page_seg_ret_val; // To create blobs from the image region bounds uncomment this line: // to_blocks.clear(); // Uncomment to go back to the old mode. } else { deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); if (pageseg_mode == PSM_CIRCLE_WORD) { Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_); if (pixcleaned != NULL) { pixDestroy(&pix_binary_); pix_binary_ = pixcleaned; } } } if (auto_page_seg_ret_val < 0) { return -1; } if (blocks->empty()) { if (textord_debug_tabfind) tprintf("Empty page\n"); return 0; // AutoPageSeg found an empty page. } bool splitting = pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT; bool cjk_mode = textord_use_cjk_fp_model; textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, pix_thresholds_, pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks); return auto_page_seg_ret_val; }
// Make the textlines and words inside each block. void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { page_tr_.set_x(width); page_tr_.set_y(height); if (to_blocks->empty()) { // AutoPageSeg was not used, so we need to find_components first. find_components(binary_pix, blocks, to_blocks); TO_BLOCK_IT it(to_blocks); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TO_BLOCK *to_block = it.data(); // Compute the edge offsets whether or not there is a grey_pix. // We have by-passed auto page seg, so we have to run it here. // By page segmentation mode there is no non-text to avoid running on. to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); } } else if (!PSM_SPARSE(pageseg_mode)) { // AutoPageSeg does not need to find_components as it did that already. // Filter_blobs sets up the TO_BLOCKs the same as find_components does. filter_blobs(page_tr_, to_blocks, true); } ASSERT_HOST(!to_blocks->empty()); if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) { const FCOORD anticlockwise90(0.0f, 1.0f); const FCOORD clockwise90(0.0f, -1.0f); TO_BLOCK_IT it(to_blocks); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TO_BLOCK *to_block = it.data(); BLOCK *block = to_block->block; // Create a fake poly_block in block from its bounding box. block->set_poly_block(new POLY_BLOCK(block->bounding_box(), PT_VERTICAL_TEXT)); // Rotate the to_block along with its contained block and blobnbox lists. to_block->rotate(anticlockwise90); // Set the block's rotation values to obey the convention followed in // layout analysis for vertical text. block->set_re_rotation(clockwise90); block->set_classify_rotation(clockwise90); } } TO_BLOCK_IT to_block_it(to_blocks); TO_BLOCK *to_block = to_block_it.data(); // Make the rows in the block. float gradient = 0; // Do it the old fashioned way. if (PSM_LINE_FIND_ENABLED(pageseg_mode)) { gradient = make_rows(page_tr_, to_blocks); } else if (!PSM_SPARSE(pageseg_mode)) { // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks); } BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks); baseline_detector.ComputeStraightBaselines(use_box_bottoms); baseline_detector.ComputeBaselineSplinesAndXheights(page_tr_, true, textord_heavy_nr, textord_show_final_rows, this); // Now make the words in the lines. if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { // SINGLE_LINE uses the old word maker on the single line. make_words(this, page_tr_, gradient, blocks, to_blocks); } else { // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a // single word, and in SINGLE_CHAR mode, all the outlines // go in a single blob. TO_BLOCK *to_block = to_block_it.data(); make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(), to_block->block->row_list()); } cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks); // Remove empties. // Compute the margins for each row in the block, to be used later for // paragraph detection. BLOCK_IT b_it(blocks); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { b_it.data()->compute_row_margins(); } #ifndef GRAPHICS_DISABLED close_to_win(); #endif }
/** * Segment the page according to the current value of tessedit_pageseg_mode. * pix_binary_ is used as the source image and should not be NULL. * On return the blocks list owns all the constructed page layout. */ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr) { ASSERT_HOST(pix_binary_ != NULL); int width = pixGetWidth(pix_binary_); int height = pixGetHeight(pix_binary_); // Get page segmentation mode. PageSegMode pageseg_mode = static_cast<PageSegMode>( static_cast<int>(tessedit_pageseg_mode)); // If a UNLV zone file can be found, use that instead of segmentation. if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != NULL && input_file->length() > 0) { STRING name = *input_file; const char* lastdot = strrchr(name.string(), '.'); if (lastdot != NULL) name[lastdot - name.string()] = '\0'; read_unlv_file(name, width, height, blocks); } if (blocks->empty()) { // No UNLV file present. Work according to the PageSegMode. // First make a single block covering the whole image. BLOCK_IT block_it(blocks); BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); block->set_right_to_left(right_to_left()); block_it.add_to_end(block); } else { // UNLV file present. Use PSM_SINGLE_BLOCK. pageseg_mode = PSM_SINGLE_BLOCK; } int auto_page_seg_ret_val = 0; TO_BLOCK_LIST to_blocks; if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) { auto_page_seg_ret_val = AutoPageSeg(pageseg_mode, blocks, &to_blocks, osd_tess, osr); if (pageseg_mode == PSM_OSD_ONLY) return auto_page_seg_ret_val; // To create blobs from the image region bounds uncomment this line: // to_blocks.clear(); // Uncomment to go back to the old mode. } else { deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); if (pageseg_mode == PSM_CIRCLE_WORD) { Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_); if (pixcleaned != NULL) { pixDestroy(&pix_binary_); pix_binary_ = pixcleaned; } } } if (auto_page_seg_ret_val < 0) { return -1; } if (blocks->empty()) { if (textord_debug_tabfind) tprintf("Empty page\n"); return 0; // AutoPageSeg found an empty page. } textord_.TextordPage(pageseg_mode, width, height, pix_binary_, blocks, &to_blocks); return auto_page_seg_ret_val; }