Пример #1
0
void draw_meanlines(                  //draw a block
        TO_BLOCK *block,  //block to draw
        float gradient,   //gradients of lines
        inT32 left,       //edge of block
        ScrollView::Color colour,    //colour to draw in
        FCOORD rotation   //rotation for line
) {
    FCOORD plot_pt;                //point to plot
    //rows
    TO_ROW_IT row_it = block->get_rows();
    TO_ROW *row;                   //current row
    BLOBNBOX_IT blob_it;           //blobs
    float right;                   //end of row
    to_win->Pen(colour);
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
        row = row_it.data();
        blob_it.set_to_list(row->blob_list());
        blob_it.move_to_last();
        right = blob_it.data()->bounding_box().right();
        plot_pt =
                FCOORD((float) left,
                       gradient * left + row->parallel_c() + row->xheight);
        plot_pt.rotate(rotation);
        to_win->SetCursor(plot_pt.x(), plot_pt.y());
        plot_pt =
                FCOORD((float) right,
                       gradient * right + row->parallel_c() + row->xheight);
        plot_pt.rotate(rotation);
        to_win->DrawTo(plot_pt.x(), plot_pt.y());
    }
}
Пример #2
0
// Fits splines to the textlines, or creates fake QSPLINES from the straight
// baselines that are already on the TO_ROWs.
// As a side-effect, computes the xheights of the rows and the block.
// Although x-height estimation is conceptually separate, it is part of
// detecting perspective distortion and therefore baseline fitting.
void BaselineBlock::FitBaselineSplines(bool enable_splines,
                                       bool show_final_rows,
                                       Textord* textord) {
  double gradient = tan(skew_angle_);
  FCOORD rotation(1.0f, 0.0f);

  if (enable_splines) {
    textord->make_spline_rows(block_, gradient, show_final_rows);
  } else {
    // Make a fake spline from the existing line.
    TBOX block_box= block_->block->bounding_box();
    TO_ROW_IT row_it = block_->get_rows();
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      TO_ROW* row = row_it.data();
      inT32 xstarts[2] = { block_box.left(), block_box.right() };
      double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
      row->baseline = QSPLINE(1, xstarts, coeffs);
      textord->compute_row_xheight(row, block_->block->classify_rotation(),
                                   row->line_m(), block_->line_size);
    }
  }
  textord->compute_block_xheight(block_, gradient);
  block_->block->set_xheight(block_->xheight);
  if (textord_restore_underlines)  // fix underlines
    restore_underlined_blobs(block_);
}
Пример #3
0
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) {
  TO_ROW_IT to_row_it(rows);
  TO_ROW* row = to_row_it.data();
  // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
  // to create the word.
  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it(&cblobs);
  BLOBNBOX_IT box_it(row->blob_list());
  for (;!box_it.empty(); box_it.forward()) {
    BLOBNBOX* bblob= box_it.extract();
    if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
      if (bblob->cblob() != NULL) {
        C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
        cout_it.move_to_last();
        cout_it.add_list_after(bblob->cblob()->out_list());
        delete bblob->cblob();
      }
    } else {
      if (bblob->cblob() != NULL)
        cblob_it.add_after_then_move(bblob->cblob());
      delete bblob;
    }
  }
  // Convert the TO_ROW to a ROW.
  ROW* real_row = new ROW(row, static_cast<inT16>(row->kern_size),
                          static_cast<inT16>(row->space_size));
  WERD_IT word_it(real_row->word_list());
  WERD* word = new WERD(&cblobs, 0, NULL);
  word->set_flag(W_BOL, TRUE);
  word->set_flag(W_EOL, TRUE);
  word_it.add_after_then_move(word);
  ROW_IT row_it(real_rows);
  row_it.add_after_then_move(real_row);
}
Пример #4
0
//yangjing01 modified : 
bool TAL_make_single_word(bool one_blob, TO_ROW_LIST* rows, ROW_LIST* real_rows)
{
  TO_ROW_IT to_row_it(rows);
  ROW_IT row_it(real_rows);
  //to_real_row is the real row information of single row or single char mode
  TO_ROW* real_to_row = NULL;
  float row_max_height = 0.0;
  for (to_row_it.mark_cycle_pt();
    !to_row_it.cycled_list(); to_row_it.forward()){
    TO_ROW* row = to_row_it.data();
    float row_min_y = row->min_y();
    float row_max_y = row->max_y();
    float row_height = abs(row_max_y - row_min_y);
    if (real_to_row == NULL
      || row_height > row_max_height
      || fabs(row_height - row_max_height) < 1.0f){
      row_max_height = row_height;
      real_to_row = row;
    }
  }

  if (real_to_row == NULL){
    return false;
  }

  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it(&cblobs);
  BLOBNBOX_IT box_it(real_to_row->blob_list());
  for (; !box_it.empty(); box_it.forward()){
    BLOBNBOX* bblob = box_it.extract();
    if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
      if (bblob->cblob() != NULL){
        C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
        cout_it.move_to_last();
        cout_it.add_list_after(bblob->cblob()->out_list());
        delete bblob->cblob();
      }
    }
    else {
      if (bblob->cblob() != NULL)
        cblob_it.add_after_then_move(bblob->cblob());
    }
    delete bblob;
  }
  // Convert the TO_ROW to a ROW.
  ROW* real_row = new ROW(real_to_row, static_cast<inT16>(real_to_row->kern_size),
    static_cast<inT16>(real_to_row->space_size));
  WERD_IT word_it(real_row->word_list());
  WERD* word = new WERD(&cblobs, 0, NULL);
  word->set_flag(W_BOL, TRUE);
  word->set_flag(W_EOL, TRUE);
  word->set_flag(W_DONT_CHOP, one_blob);
  word_it.add_after_then_move(word);
  row_it.add_after_then_move(real_row);

  return true;
}
Пример #5
0
void make_real_words(
                     tesseract::Textord *textord,
                     TO_BLOCK *block,  //block to do
                     FCOORD rotation   //for drawing
                    ) {
  TO_ROW *row;                   //current row
  TO_ROW_IT row_it = block->get_rows ();
  ROW *real_row = NULL;          //output row
  ROW_IT real_row_it = block->block->row_list ();

  if (row_it.empty ())
    return;                      //empty block
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
      real_row = make_rep_words (row, block);
    } else if (!row->blob_list()->empty()) {
      // In a fixed pitch document, some lines may be detected as fixed pitch
      // while others don't, and will go through different path.
      // For non-space delimited language like CJK, fixed pitch chop always
      // leave the entire line as one word.  We can force consistent chopping
      // with force_make_prop_words flag.
      POLY_BLOCK* pb = block->block->poly_block();
      if (textord_chopper_test) {
        real_row = textord->make_blob_words (row, rotation);
      } else if (textord_force_make_prop_words ||
                 (pb != NULL && !pb->IsText()) ||
                 row->pitch_decision == PITCH_DEF_PROP ||
                 row->pitch_decision == PITCH_CORR_PROP) {
        real_row = textord->make_prop_words (row, rotation);
      } else if (row->pitch_decision == PITCH_DEF_FIXED ||
                 row->pitch_decision == PITCH_CORR_FIXED) {
        real_row = fixed_pitch_words (row, rotation);
      } else {
        ASSERT_HOST(FALSE);
      }
    }

    if (real_row != NULL) {
                                 //put row in block
      real_row_it.add_after_then_move (real_row);
    }
  }

  block->block->set_stats (block->fixed_pitch == 0, (inT16) block->kern_size,
    (inT16) block->space_size,
    (inT16) block->fixed_pitch);
  block->block->check_pitch ();
}
Пример #6
0
void set_row_spaces(                  //find space sizes
                    TO_BLOCK *block,  //block to do
                    FCOORD rotation,  //for drawing
                    BOOL8 testing_on  //correct orientation
                   ) {
  inT32 maxwidth;                //of widest space
  TO_ROW *row;                   //current row
  TO_ROW_IT row_it = block->get_rows ();

  if (row_it.empty ())
    return;                      //empty block
  maxwidth = (inT32) ceil (block->xheight * textord_words_maxspace);
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    if (row->fixed_pitch == 0) {
      //                      if (!textord_test_mode
      //                      && row_words(block,row,maxwidth,rotation,testing_on)==0
      //                      || textord_test_mode
      //                      && row_words2(block,row,maxwidth,rotation,testing_on)==0)
      //                      {
      row->min_space =
        (inT32) ceil (row->pr_space -
        (row->pr_space -
        row->pr_nonsp) * textord_words_definite_spread);
      row->max_nonspace =
        (inT32) floor (row->pr_nonsp +
        (row->pr_space -
        row->pr_nonsp) * textord_words_definite_spread);
      if (testing_on && textord_show_initial_words) {
        tprintf ("Assigning defaults %d non, %d space to row at %g\n",
          row->max_nonspace, row->min_space, row->intercept ());
      }
      row->space_threshold = (row->max_nonspace + row->min_space) / 2;
      row->space_size = row->pr_space;
      row->kern_size = row->pr_nonsp;
      //                      }
    }
#ifndef GRAPHICS_DISABLED
    if (textord_show_initial_words && testing_on) {
      plot_word_decisions (to_win, (inT16) row->fixed_pitch, row);
    }
#endif
  }
}
Пример #7
0
void set_row_spaces(                  //find space sizes
        TO_BLOCK* block,  //block to do
        FCOORD rotation,  //for drawing
        bool testing_on  //correct orientation
) {
  TO_ROW *row;                   //current row
  TO_ROW_IT row_it = block->get_rows ();

  if (row_it.empty ())
    return;                      //empty block
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    if (row->fixed_pitch == 0) {
      row->min_space =
        static_cast<int32_t>(ceil (row->pr_space -
        (row->pr_space -
        row->pr_nonsp) * textord_words_definite_spread));
      row->max_nonspace =
        static_cast<int32_t>(floor (row->pr_nonsp +
        (row->pr_space -
        row->pr_nonsp) * textord_words_definite_spread));
      if (testing_on && textord_show_initial_words) {
        tprintf ("Assigning defaults %d non, %d space to row at %g\n",
          row->max_nonspace, row->min_space, row->intercept ());
      }
      row->space_threshold = (row->max_nonspace + row->min_space) / 2;
      row->space_size = row->pr_space;
      row->kern_size = row->pr_nonsp;
    }
#ifndef GRAPHICS_DISABLED
    if (textord_show_initial_words && testing_on) {
      plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row);
    }
#endif
  }
}
Пример #8
0
void restore_underlined_blobs(                 //get chop points
                              TO_BLOCK *block  //block to do
                             ) {
  int16_t chop_coord;              //chop boundary
  TBOX blob_box;                  //of underline
  BLOBNBOX *u_line;              //underline bit
  TO_ROW *row;                   //best row for blob
  ICOORDELT_LIST chop_cells;     //blobs to cut out
                                 //real underlines
  BLOBNBOX_LIST residual_underlines;
  C_OUTLINE_LIST left_coutlines;
  C_OUTLINE_LIST right_coutlines;
  ICOORDELT_IT cell_it = &chop_cells;
                                 //under lines
  BLOBNBOX_IT under_it = &block->underlines;
  BLOBNBOX_IT ru_it = &residual_underlines;

  if (block->get_rows()->empty())
    return;  // Don't crash if there are no rows.
  for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
  under_it.forward ()) {
    u_line = under_it.extract ();
    blob_box = u_line->bounding_box ();
    row = most_overlapping_row (block->get_rows (), u_line);
    if (row == nullptr)
      return;  // Don't crash if there is no row.
    find_underlined_blobs (u_line, &row->baseline, row->xheight,
      row->xheight * textord_underline_offset,
      &chop_cells);
    cell_it.set_to_list (&chop_cells);
    for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
    cell_it.forward ()) {
      chop_coord = cell_it.data ()->x ();
      if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
        split_to_blob (u_line, chop_coord,
          textord_fp_chop_error + 0.5,
          &left_coutlines,
          &right_coutlines);
        if (!left_coutlines.empty()) {
          ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
        }
        chop_coord = cell_it.data ()->y ();
        split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5,
                      &left_coutlines, &right_coutlines);
        if (!left_coutlines.empty()) {
          row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
        }
        u_line = nullptr;           //no more blobs to add
      }
      delete cell_it.extract();
    }
    if (!right_coutlines.empty ()) {
      split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5,
                    &left_coutlines, &right_coutlines);
      if (!left_coutlines.empty())
        ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
    }
    if (u_line != nullptr) {
      delete u_line->cblob();
      delete u_line;
    }
  }
  if (!ru_it.empty()) {
    ru_it.move_to_first();
    for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
      under_it.add_after_then_move(ru_it.extract());
    }
  }
}
Пример #9
0
GAPMAP::GAPMAP(                 //Constructor
               TO_BLOCK *block  //block
              ) {
  TO_ROW_IT row_it;              //row iterator
  TO_ROW *row;                   //current row
  BLOBNBOX_IT blob_it;           //iterator
  TBOX blob_box;
  TBOX prev_blob_box;
  inT16 gap_width;
  inT16 start_of_row;
  inT16 end_of_row;
  STATS xht_stats (0, 128);
  inT16 min_quantum;
  inT16 max_quantum;
  inT16 i;

  row_it.set_to_list (block->get_rows ());
  /*
    Find left and right extremes and bucket size
  */
  map = NULL;
  min_left = MAX_INT16;
  max_right = -MAX_INT16;
  total_rows = 0;
  any_tabs = FALSE;
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    if (!row->blob_list ()->empty ()) {
      total_rows++;
      xht_stats.add ((inT16) floor (row->xheight + 0.5), 1);
      blob_it.set_to_list (row->blob_list ());
      start_of_row = blob_it.data ()->bounding_box ().left ();
      end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
      if (min_left > start_of_row)
        min_left = start_of_row;
      if (max_right < end_of_row)
        max_right = end_of_row;
    }
  }
  if ((total_rows < 3) || (min_left >= max_right)) {
    total_rows = 0;
    min_left = max_right = 0;
    return;
  }
  bucket_size = (inT16) floor (xht_stats.median () + 0.5) / 2;
  map_max = (max_right - min_left) / bucket_size;
  map = (inT16 *) alloc_mem ((map_max + 1) * sizeof (inT16));
  for (i = 0; i <= map_max; i++)
    map[i] = 0;

  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    if (!row->blob_list ()->empty ()) {
      blob_it.set_to_list (row->blob_list ());
      blob_it.mark_cycle_pt ();
      blob_box = box_next (&blob_it);
      prev_blob_box = blob_box;
      if (gapmap_use_ends) {
        /* Leading space */
        gap_width = blob_box.left () - min_left;
        if ((gap_width > gapmap_big_gaps * row->xheight)
        && gap_width > 2) {
          max_quantum = (blob_box.left () - min_left) / bucket_size;
          for (i = 0; i <= max_quantum; i++)
            map[i]++;
        }
      }
      while (!blob_it.cycled_list ()) {
        blob_box = box_next (&blob_it);
        gap_width = blob_box.left () - prev_blob_box.right ();
        if ((gap_width > gapmap_big_gaps * row->xheight)
        && gap_width > 2) {
          min_quantum =
            (prev_blob_box.right () - min_left) / bucket_size;
          max_quantum = (blob_box.left () - min_left) / bucket_size;
          for (i = min_quantum; i <= max_quantum; i++)
            map[i]++;
        }
        prev_blob_box = blob_box;
      }
      if (gapmap_use_ends) {
        /* Trailing space */
        gap_width = max_right - prev_blob_box.right ();
        if ((gap_width > gapmap_big_gaps * row->xheight)
        && gap_width > 2) {
          min_quantum =
            (prev_blob_box.right () - min_left) / bucket_size;
          for (i = min_quantum; i <= map_max; i++)
            map[i]++;
        }
      }
    }
  }
  for (i = 0; i <= map_max; i++) {
    if (map[i] > total_rows / 2) {
      if (gapmap_no_isolated_quanta &&
        (((i == 0) &&
        (map[i + 1] <= total_rows / 2)) ||
        ((i == map_max) &&
        (map[i - 1] <= total_rows / 2)) ||
        ((i > 0) &&
        (i < map_max) &&
        (map[i - 1] <= total_rows / 2) &&
      (map[i + 1] <= total_rows / 2)))) {
        map[i] = 0;              //prevent isolated quantum
      }
      else
        any_tabs = TRUE;
    }
  }
  if (gapmap_debug && any_tabs)
    tprintf ("Table found\n");
}