Example #1
0
// Inserts all the blobs from the given list, with x and y spreading,
// without removing from the source list, so ownership remains with the
// source list.
    void BlobGrid::InsertBlobList(BLOBNBOX_LIST * blobs) {
        BLOBNBOX_IT blob_it(blobs);
        for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
            BLOBNBOX *blob = blob_it.data();
            if (!blob->joined_to_prev())
                InsertBBox(true, true, blob);
        }
    }
Example #2
0
//yangjing01 modified : 
bool TAL_make_single_word(bool one_blob, TO_ROW_LIST* rows, ROW_LIST* real_rows)
{
  TO_ROW_IT to_row_it(rows);
  ROW_IT row_it(real_rows);
  //to_real_row is the real row information of single row or single char mode
  TO_ROW* real_to_row = NULL;
  float row_max_height = 0.0;
  for (to_row_it.mark_cycle_pt();
    !to_row_it.cycled_list(); to_row_it.forward()){
    TO_ROW* row = to_row_it.data();
    float row_min_y = row->min_y();
    float row_max_y = row->max_y();
    float row_height = abs(row_max_y - row_min_y);
    if (real_to_row == NULL
      || row_height > row_max_height
      || fabs(row_height - row_max_height) < 1.0f){
      row_max_height = row_height;
      real_to_row = row;
    }
  }

  if (real_to_row == NULL){
    return false;
  }

  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it(&cblobs);
  BLOBNBOX_IT box_it(real_to_row->blob_list());
  for (; !box_it.empty(); box_it.forward()){
    BLOBNBOX* bblob = box_it.extract();
    if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
      if (bblob->cblob() != NULL){
        C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
        cout_it.move_to_last();
        cout_it.add_list_after(bblob->cblob()->out_list());
        delete bblob->cblob();
      }
    }
    else {
      if (bblob->cblob() != NULL)
        cblob_it.add_after_then_move(bblob->cblob());
    }
    delete bblob;
  }
  // Convert the TO_ROW to a ROW.
  ROW* real_row = new ROW(real_to_row, static_cast<inT16>(real_to_row->kern_size),
    static_cast<inT16>(real_to_row->space_size));
  WERD_IT word_it(real_row->word_list());
  WERD* word = new WERD(&cblobs, 0, NULL);
  word->set_flag(W_BOL, TRUE);
  word->set_flag(W_EOL, TRUE);
  word->set_flag(W_DONT_CHOP, one_blob);
  word_it.add_after_then_move(word);
  row_it.add_after_then_move(real_row);

  return true;
}
Example #3
0
WERD *make_real_word(BLOBNBOX_IT *box_it,  //iterator
                     inT32 blobcount,      //no of blobs to use
                     BOOL8 bol,            //start of line
                     uinT8 blanks          //no of blanks
                    ) {
  OUTLINE_IT out_it;             // outlines
  C_OUTLINE_IT cout_it;
  PBLOB_LIST blobs;              // blobs in word
  C_BLOB_LIST cblobs;
  PBLOB_IT blob_it = &blobs;     // iterator
  C_BLOB_IT cblob_it = &cblobs;
  WERD *word;                    // new word
  BLOBNBOX *bblob;               // current blob
  inT32 blobindex;               // in row

  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    bblob = box_it->extract();
    if (bblob->joined_to_prev()) {
      if (bblob->blob() != NULL) {
        out_it.set_to_list(blob_it.data()->out_list());
        out_it.move_to_last();
        out_it.add_list_after(bblob->blob()->out_list());
        delete bblob->blob();
      }
      else if (bblob->cblob() != NULL) {
        cout_it.set_to_list(cblob_it.data()->out_list());
        cout_it.move_to_last();
        cout_it.add_list_after(bblob->cblob()->out_list());
        delete bblob->cblob();
      }
    }
    else {
      if (bblob->blob() != NULL)
        blob_it.add_after_then_move(bblob->blob());
      else if (bblob->cblob() != NULL)
        cblob_it.add_after_then_move(bblob->cblob());
    }
    delete bblob;
    box_it->forward();          // next one
  }

  if (blanks < 1)
    blanks = 1;

  if (blob_it.empty())
    word = new WERD(&cblobs, blanks, NULL);
  else
    word = new WERD(&blobs, blanks, NULL);

  if (bol)
    word->set_flag(W_BOL, TRUE);
  if (box_it->at_first())
    word->set_flag(W_EOL, TRUE);  // at end of line

  return word;
}
Example #4
0
TBOX box_next_pre_chopped(                 //get bounding box
                         BLOBNBOX_IT *it  //iterator to blobds
                        ) {
  BLOBNBOX *blob;                //current blob
  TBOX result;                    //total box

  blob = it->data ();
  result = blob->bounding_box ();
  do {
    it->forward ();
    blob = it->data ();
  }
                                 //until next real blob
  while (blob->joined_to_prev ());
  return result;
}
Example #5
0
TBOX box_next(                 //get bounding box
             BLOBNBOX_IT *it  //iterator to blobds
            ) {
  BLOBNBOX *blob;                //current blob
  TBOX result;                    //total box

  blob = it->data ();
  result = blob->bounding_box ();
  do {
    it->forward ();
    blob = it->data ();
    if (blob->cblob() == NULL)
                                 //was pre-chopped
      result += blob->bounding_box ();
  }
                                 //until next real blob
  while ((blob->cblob() == NULL) || blob->joined_to_prev());
  return result;
}
Example #6
0
WERD *make_real_word(BLOBNBOX_IT *box_it,  //iterator
                     int32_t blobcount,      //no of blobs to use
                     bool bol,            //start of line
                     uint8_t blanks          //no of blanks
                    ) {
  C_OUTLINE_IT cout_it;
  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it = &cblobs;
  WERD *word;                    // new word
  BLOBNBOX *bblob;               // current blob
  int32_t blobindex;               // in row

  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    bblob = box_it->extract();
    if (bblob->joined_to_prev()) {
      if (bblob->cblob() != nullptr) {
        cout_it.set_to_list(cblob_it.data()->out_list());
        cout_it.move_to_last();
        cout_it.add_list_after(bblob->cblob()->out_list());
        delete bblob->cblob();
      }
    }
    else {
      if (bblob->cblob() != nullptr)
        cblob_it.add_after_then_move(bblob->cblob());
    }
    delete bblob;
    box_it->forward();          // next one
  }

  if (blanks < 1)
    blanks = 1;

  word = new WERD(&cblobs, blanks, nullptr);

  if (bol)
    word->set_flag(W_BOL, true);
  if (box_it->at_first())
    word->set_flag(W_EOL, true);  // at end of line

  return word;
}
Example #7
0
int32_t row_words2(                  //compute space size
        TO_BLOCK* block,  //block it came from
        TO_ROW* row,      //row to operate on
        int32_t maxwidth,   //max expected space size
        FCOORD rotation,  //for drawing
        bool testing_on  //for debug
) {
  bool prev_valid;              //if decent size
  bool this_valid;              //current blob big enough
  int32_t prev_x;                  //end of prev blob
  int32_t min_width;               //min interesting width
  int32_t valid_count;             //good gaps
  int32_t total_count;             //total gaps
  int32_t cluster_count;           //no of clusters
  int32_t prev_count;              //previous cluster_count
  int32_t gap_index;               //which cluster
  int32_t smooth_factor;           //for smoothing stats
  BLOBNBOX *blob;                //current blob
  float lower, upper;            //clustering parameters
  ICOORD testpt;
  TBOX blob_box;                  //bounding box
                                 //iterator
  BLOBNBOX_IT blob_it = row->blob_list ();
  STATS gap_stats (0, maxwidth);
                                 //gap sizes
  float gaps[BLOCK_STATS_CLUSTERS];
  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
  //clusters

  testpt = ICOORD (textord_test_x, textord_test_y);
  smooth_factor =
    static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
  //      if (testing_on)
  //              tprintf("Row smooth factor=%d\n",smooth_factor);
  prev_valid = false;
  prev_x = -INT16_MAX;
  const bool testing_row = false;
                                 //min blob size
  min_width = static_cast<int32_t>(block->pr_space);
  total_count = 0;
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    blob = blob_it.data ();
    if (!blob->joined_to_prev ()) {
      blob_box = blob->bounding_box ();
      this_valid = blob_box.width () >= min_width;
      if (this_valid && prev_valid
      && blob_box.left () - prev_x < maxwidth) {
        gap_stats.add (blob_box.left () - prev_x, 1);
      }
      total_count++;             //count possibles
      prev_x = blob_box.right ();
      prev_valid = this_valid;
    }
  }
  valid_count = gap_stats.get_total ();
  if (valid_count < total_count * textord_words_minlarge) {
    gap_stats.clear ();
    prev_x = -INT16_MAX;
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      if (!blob->joined_to_prev ()) {
        blob_box = blob->bounding_box ();
        if (blob_box.left () - prev_x < maxwidth) {
          gap_stats.add (blob_box.left () - prev_x, 1);
        }
        prev_x = blob_box.right ();
      }
    }
  }
  if (gap_stats.get_total () == 0) {
    row->min_space = 0;          //no evidence
    row->max_nonspace = 0;
    return 0;
  }

  cluster_count = 0;
  lower = block->xheight * words_initial_lower;
  upper = block->xheight * words_initial_upper;
  gap_stats.smooth (smooth_factor);
  do {
    prev_count = cluster_count;
    cluster_count = gap_stats.cluster (lower, upper,
      textord_spacesize_ratioprop,
      BLOCK_STATS_CLUSTERS, cluster_stats);
  }
  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
  if (cluster_count < 1) {
    row->min_space = 0;
    row->max_nonspace = 0;
    return 0;
  }
  for (gap_index = 0; gap_index < cluster_count; gap_index++)
    gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
  //get medians
  if (testing_on) {
    tprintf ("cluster_count=%d:", cluster_count);
    for (gap_index = 0; gap_index < cluster_count; gap_index++)
      tprintf (" %g(%d)", gaps[gap_index],
        cluster_stats[gap_index + 1].get_total ());
    tprintf ("\n");
  }

  //Try to find proportional non-space and space for row.
  for (gap_index = 0; gap_index < cluster_count
    && gaps[gap_index] > block->max_nonspace; gap_index++);
  if (gap_index < cluster_count)
    lower = gaps[gap_index];     //most frequent below
  else {
    if (testing_on)
      tprintf ("No cluster below block threshold!, using default=%g\n",
        block->pr_nonsp);
    lower = block->pr_nonsp;
  }
  for (gap_index = 0; gap_index < cluster_count
    && gaps[gap_index] <= block->max_nonspace; gap_index++);
  if (gap_index < cluster_count)
    upper = gaps[gap_index];     //most frequent above
  else {
    if (testing_on)
      tprintf ("No cluster above block threshold!, using default=%g\n",
        block->pr_space);
    upper = block->pr_space;
  }
  row->min_space =
    static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
  row->max_nonspace =
    static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
  row->space_size = upper;
  row->kern_size = lower;
  if (testing_on) {
    if (testing_row) {
      tprintf ("GAP STATS\n");
      gap_stats.print();
      tprintf ("SPACE stats\n");
      cluster_stats[2].print_summary();
      tprintf ("NONSPACE stats\n");
      cluster_stats[1].print_summary();
    }
    tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
      row->intercept (), row->min_space, upper,
      row->max_nonspace, lower);
  }
  return 1;
}
Example #8
0
int32_t row_words(                  //compute space size
        TO_BLOCK* block,  //block it came from
        TO_ROW* row,      //row to operate on
        int32_t maxwidth,   //max expected space size
        FCOORD rotation,  //for drawing
        bool testing_on  //for debug
) {
  bool testing_row;             //contains testpt
  bool prev_valid;              //if decent size
  int32_t prev_x;                //end of prev blob
  int32_t cluster_count;         //no of clusters
  int32_t gap_index;             //which cluster
  int32_t smooth_factor;         //for smoothing stats
  BLOBNBOX *blob;                //current blob
  float lower, upper;            //clustering parameters
  float gaps[3];                 //gap clusers
  ICOORD testpt;
  TBOX blob_box;                  //bounding box
                                 //iterator
  BLOBNBOX_IT blob_it = row->blob_list ();
  STATS gap_stats (0, maxwidth);
  STATS cluster_stats[4];        //clusters

  testpt = ICOORD (textord_test_x, textord_test_y);
  smooth_factor =
    static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
  //      if (testing_on)
  //              tprintf("Row smooth factor=%d\n",smooth_factor);
  prev_valid = false;
  prev_x = -INT32_MAX;
  testing_row = false;
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    blob = blob_it.data ();
    blob_box = blob->bounding_box ();
    if (blob_box.contains (testpt))
      testing_row = true;
    gap_stats.add (blob_box.width (), 1);
  }
  gap_stats.clear ();
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    blob = blob_it.data ();
    if (!blob->joined_to_prev ()) {
      blob_box = blob->bounding_box ();
      if (prev_valid && blob_box.left () - prev_x < maxwidth) {
        gap_stats.add (blob_box.left () - prev_x, 1);
      }
      prev_valid = true;
      prev_x = blob_box.right ();
    }
  }
  if (gap_stats.get_total () == 0) {
    row->min_space = 0;          //no evidence
    row->max_nonspace = 0;
    return 0;
  }
  gap_stats.smooth (smooth_factor);
  lower = row->xheight * textord_words_initial_lower;
  upper = row->xheight * textord_words_initial_upper;
  cluster_count = gap_stats.cluster (lower, upper,
    textord_spacesize_ratioprop, 3,
    cluster_stats);
  while (cluster_count < 2 && ceil (lower) < floor (upper)) {
                                 //shrink gap
    upper = (upper * 3 + lower) / 4;
    lower = (lower * 3 + upper) / 4;
    cluster_count = gap_stats.cluster (lower, upper,
      textord_spacesize_ratioprop, 3,
      cluster_stats);
  }
  if (cluster_count < 2) {
    row->min_space = 0;          //no evidence
    row->max_nonspace = 0;
    return 0;
  }
  for (gap_index = 0; gap_index < cluster_count; gap_index++)
    gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
  //get medians
  if (cluster_count > 2) {
    if (testing_on && textord_show_initial_words) {
      tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
        row->intercept (),
        cluster_stats[1].ile (0.5),
        cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
    }
    lower = gaps[0];
    if (gaps[1] > lower) {
      upper = gaps[1];           //prefer most frequent
      if (upper < block->xheight * textord_words_min_minspace
      && gaps[2] > gaps[1]) {
        upper = gaps[2];
      }
    }
    else if (gaps[2] > lower
      && gaps[2] >= block->xheight * textord_words_min_minspace)
      upper = gaps[2];
    else if (lower >= block->xheight * textord_words_min_minspace) {
      upper = lower;             //not nice
      lower = gaps[1];
      if (testing_on && textord_show_initial_words) {
        tprintf ("Had to switch most common from lower to upper!!\n");
        gap_stats.print();
      }
    }
    else {
      row->min_space = 0;        //no evidence
      row->max_nonspace = 0;
      return 0;
    }
  }
  else {
    if (gaps[1] < gaps[0]) {
      if (testing_on && textord_show_initial_words) {
        tprintf ("Had to switch most common from lower to upper!!\n");
        gap_stats.print();
      }
      lower = gaps[1];
      upper = gaps[0];
    }
    else {
      upper = gaps[1];
      lower = gaps[0];
    }
  }
  if (upper < block->xheight * textord_words_min_minspace) {
    row->min_space = 0;          //no evidence
    row->max_nonspace = 0;
    return 0;
  }
  if (upper * 3 < block->min_space * 2 + block->max_nonspace
  || lower * 3 > block->min_space * 2 + block->max_nonspace) {
    if (testing_on && textord_show_initial_words) {
      tprintf ("Disagreement between block and row at %g!!\n",
        row->intercept ());
      tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
      gap_stats.print();
    }
  }
  row->min_space =
    static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
  row->max_nonspace =
    static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
  row->space_size = upper;
  row->kern_size = lower;
  if (testing_on && textord_show_initial_words) {
    if (testing_row) {
      tprintf ("GAP STATS\n");
      gap_stats.print();
      tprintf ("SPACE stats\n");
      cluster_stats[2].print_summary();
      tprintf ("NONSPACE stats\n");
      cluster_stats[1].print_summary();
    }
    tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
      row->intercept (), row->min_space, upper,
      row->max_nonspace, lower);
  }
  return cluster_stats[2].get_total ();
}
Example #9
0
void plot_word_decisions(              //draw words
        ScrollView *win,   //window tro draw in
        inT16 pitch,  //of block
        TO_ROW *row   //row to draw
) {
    ScrollView::Color colour = ScrollView::MAGENTA;       //current colour
    ScrollView::Color rect_colour;            //fuzzy colour
    inT32 prev_x;                  //end of prev blob
    inT16 blob_count;              //blobs in word
    BLOBNBOX *blob;                //current blob
    TBOX blob_box;                  //bounding box
    //iterator
    BLOBNBOX_IT blob_it = row->blob_list();
    BLOBNBOX_IT start_it = blob_it;//word start

    rect_colour = ScrollView::BLACK;
    prev_x = -MAX_INT16;
    blob_count = 0;
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
        blob = blob_it.data();
        blob_box = blob->bounding_box();
        if (!blob->joined_to_prev()
            && blob_box.left() - prev_x > row->max_nonspace) {
            if ((blob_box.left() - prev_x >= row->min_space
                 || blob_box.left() - prev_x > row->space_threshold)
                && blob_count > 0) {
                if (pitch > 0 && textord_show_fixed_cuts)
                    plot_fp_cells(win, colour, &start_it, pitch, blob_count,
                                  &row->projection, row->projection_left,
                                  row->projection_right,
                                  row->xheight * textord_projection_scale);
                blob_count = 0;
                start_it = blob_it;
            }
            if (colour == ScrollView::MAGENTA)
                colour = ScrollView::RED;
            else
                colour = (ScrollView::Color)(colour + 1);
            if (blob_box.left() - prev_x < row->min_space) {
                if (blob_box.left() - prev_x > row->space_threshold)
                    rect_colour = ScrollView::GOLDENROD;
                else
                    rect_colour = ScrollView::CORAL;
                //fill_color_index(win, rect_colour);
                win->Brush(rect_colour);
                win->Rectangle(prev_x, blob_box.bottom(),
                               blob_box.left(), blob_box.top());
            }
        }
        if (!blob->joined_to_prev())
            prev_x = blob_box.right();
        if (blob->cblob() != NULL)
            blob->cblob()->plot(win, colour, colour);
        if (!blob->joined_to_prev() && blob->cblob() != NULL)
            blob_count++;
    }
    if (pitch > 0 && textord_show_fixed_cuts && blob_count > 0)
        plot_fp_cells(win, colour, &start_it, pitch, blob_count,
                      &row->projection, row->projection_left,
                      row->projection_right,
                      row->xheight * textord_projection_scale);
}