Beispiel #1
0
void make_real_words(
                     tesseract::Textord *textord,
                     TO_BLOCK *block,  //block to do
                     FCOORD rotation   //for drawing
                    ) {
  TO_ROW *row;                   //current row
  TO_ROW_IT row_it = block->get_rows ();
  ROW *real_row = NULL;          //output row
  ROW_IT real_row_it = block->block->row_list ();

  if (row_it.empty ())
    return;                      //empty block
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
      real_row = make_rep_words (row, block);
    } else if (!row->blob_list()->empty()) {
      // In a fixed pitch document, some lines may be detected as fixed pitch
      // while others don't, and will go through different path.
      // For non-space delimited language like CJK, fixed pitch chop always
      // leave the entire line as one word.  We can force consistent chopping
      // with force_make_prop_words flag.
      POLY_BLOCK* pb = block->block->poly_block();
      if (textord_chopper_test) {
        real_row = textord->make_blob_words (row, rotation);
      } else if (textord_force_make_prop_words ||
                 (pb != NULL && !pb->IsText()) ||
                 row->pitch_decision == PITCH_DEF_PROP ||
                 row->pitch_decision == PITCH_CORR_PROP) {
        real_row = textord->make_prop_words (row, rotation);
      } else if (row->pitch_decision == PITCH_DEF_FIXED ||
                 row->pitch_decision == PITCH_CORR_FIXED) {
        real_row = fixed_pitch_words (row, rotation);
      } else {
        ASSERT_HOST(FALSE);
      }
    }

    if (real_row != NULL) {
                                 //put row in block
      real_row_it.add_after_then_move (real_row);
    }
  }

  block->block->set_stats (block->fixed_pitch == 0, (inT16) block->kern_size,
    (inT16) block->space_size,
    (inT16) block->fixed_pitch);
  block->block->check_pitch ();
}
Beispiel #2
0
BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
                               TO_BLOCK_LIST* blocks)
    : page_skew_(page_skew), debug_level_(debug_level) {
  TO_BLOCK_IT it(blocks);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    TO_BLOCK* to_block = it.data();
    BLOCK* block = to_block->block;
    POLY_BLOCK* pb = block->poly_block();
    // A note about non-text blocks.
    // On output, non-text blocks are supposed to contain a single empty word
    // in each incoming text line. These mark out the polygonal bounds of the
    // block. Ideally no baselines should be required, but currently
    // make_words crashes if a baseline and xheight are not provided, so we
    // include non-text blocks here, but flag them for special treatment.
    bool non_text = pb != NULL && !pb->IsText();
    blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
  }
}