void make_real_words( tesseract::Textord *textord, TO_BLOCK *block, //block to do FCOORD rotation //for drawing ) { TO_ROW *row; //current row TO_ROW_IT row_it = block->get_rows (); ROW *real_row = NULL; //output row ROW_IT real_row_it = block->block->row_list (); if (row_it.empty ()) return; //empty block for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); if (row->blob_list ()->empty () && !row->rep_words.empty ()) { real_row = make_rep_words (row, block); } else if (!row->blob_list()->empty()) { // In a fixed pitch document, some lines may be detected as fixed pitch // while others don't, and will go through different path. // For non-space delimited language like CJK, fixed pitch chop always // leave the entire line as one word. We can force consistent chopping // with force_make_prop_words flag. POLY_BLOCK* pb = block->block->poly_block(); if (textord_chopper_test) { real_row = textord->make_blob_words (row, rotation); } else if (textord_force_make_prop_words || (pb != NULL && !pb->IsText()) || row->pitch_decision == PITCH_DEF_PROP || row->pitch_decision == PITCH_CORR_PROP) { real_row = textord->make_prop_words (row, rotation); } else if (row->pitch_decision == PITCH_DEF_FIXED || row->pitch_decision == PITCH_CORR_FIXED) { real_row = fixed_pitch_words (row, rotation); } else { ASSERT_HOST(FALSE); } } if (real_row != NULL) { //put row in block real_row_it.add_after_then_move (real_row); } } block->block->set_stats (block->fixed_pitch == 0, (inT16) block->kern_size, (inT16) block->space_size, (inT16) block->fixed_pitch); block->block->check_pitch (); }
BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew, TO_BLOCK_LIST* blocks) : page_skew_(page_skew), debug_level_(debug_level) { TO_BLOCK_IT it(blocks); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TO_BLOCK* to_block = it.data(); BLOCK* block = to_block->block; POLY_BLOCK* pb = block->poly_block(); // A note about non-text blocks. // On output, non-text blocks are supposed to contain a single empty word // in each incoming text line. These mark out the polygonal bounds of the // block. Ideally no baselines should be required, but currently // make_words crashes if a baseline and xheight are not provided, so we // include non-text blocks here, but flag them for special treatment. bool non_text = pb != NULL && !pb->IsText(); blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block)); } }