Beispiel #1
0
void find_cblob_limits(                  //get y limits
                       C_BLOB *blob,     //blob to search
                       float leftx,      //x limits
                       float rightx,
                       FCOORD rotation,  //for landscape
                       float &ymin,      //output y limits
                       float &ymax) {
  inT16 stepindex;               //current point
  ICOORD pos;                    //current coords
  ICOORD vec;                    //rotated step
  C_OUTLINE *outline;            //current outline
                                 //outlines
  C_OUTLINE_IT out_it = blob->out_list ();

  ymin = (float) MAX_INT32;
  ymax = (float) -MAX_INT32;
  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
    outline = out_it.data ();
    pos = outline->start_pos (); //get coords
    pos.rotate (rotation);
    for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
                                 //inside
      if (pos.x () >= leftx && pos.x () <= rightx) {
        UpdateRange(pos.y(), &ymin, &ymax);
      }
      vec = outline->step (stepindex);
      vec.rotate (rotation);
      pos += vec;                //move to next
    }
  }
}
Beispiel #2
0
void find_cblob_hlimits(                //get x limits
                        C_BLOB *blob,   //blob to search
                        float bottomy,  //y limits
                        float topy,
                        float &xmin,    //output x limits
                        float &xmax) {
  int16_t stepindex;               //current point
  ICOORD pos;                    //current coords
  ICOORD vec;                    //rotated step
  C_OUTLINE *outline;            //current outline
                                 //outlines
  C_OUTLINE_IT out_it = blob->out_list ();

  xmin = (float) INT32_MAX;
  xmax = (float) -INT32_MAX;
  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
    outline = out_it.data ();
    pos = outline->start_pos (); //get coords
    for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
                                 //inside
      if (pos.y () >= bottomy && pos.y () <= topy) {
        UpdateRange(pos.x(), &xmin, &xmax);
      }
      vec = outline->step (stepindex);
      pos += vec;                //move to next
    }
  }
}
Beispiel #3
0
// Remove outlines that are a tiny fraction in either width or height
// of the word height.
void Textord::clean_small_noise_from_words(ROW *row) {
  WERD_IT word_it(row->word_list());
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    WERD* word = word_it.data();
    int min_size = static_cast<int>(
      textord_noise_hfract * word->bounding_box().height() + 0.5);
    C_BLOB_IT blob_it(word->cblob_list());
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      C_BLOB* blob = blob_it.data();
      C_OUTLINE_IT out_it(blob->out_list());
      for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
        C_OUTLINE* outline = out_it.data();
        outline->RemoveSmallRecursive(min_size, &out_it);
      }
      if (blob->out_list()->empty()) {
        delete blob_it.extract();
      }
    }
    if (word->cblob_list()->empty()) {
      if (!word_it.at_last()) {
        // The next word is no longer a fuzzy non space if it was before,
        // since the word before is about to be deleted.
        WERD* next_word = word_it.data_relative(1);
        if (next_word->flag(W_FUZZY_NON)) {
          next_word->set_flag(W_FUZZY_NON, false);
        }
      }
      delete word_it.extract();
    }
  }
}
Beispiel #4
0
BOOL8
C_OUTLINE::operator< (           //winding number
const C_OUTLINE & other          //other outline
) const
{
  inT16 count = 0;               //winding count
  ICOORD pos;                    //position of point
  inT32 stepindex;               //index to cstep

  if (!box.overlap (other.box))
    return FALSE;                //can't be contained
  if (stepcount == 0)
    return other.box.contains(this->box);

  pos = start;
  for (stepindex = 0; stepindex < stepcount
    && (count = other.winding_number (pos)) == INTERSECTING; stepindex++)
    pos += step (stepindex);     //try all points
  if (count == INTERSECTING) {
                                 //all intersected
    pos = other.start;
    for (stepindex = 0; stepindex < other.stepcount
      && (count = winding_number (pos)) == INTERSECTING; stepindex++)
                                 //try other way round
      pos += other.step (stepindex);
    return count == INTERSECTING || count == 0;
  }
  return count != 0;
}
Beispiel #5
0
static void render_outline_list(C_OUTLINE_LIST *list,
                                int left, int top, Pix* pix) {
  C_OUTLINE_IT it(list);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    C_OUTLINE* outline = it.data();
    outline->render(left, top, pix);
    if (!outline->child()->empty())
      render_outline_list(outline->child(), left, top, pix);
  }
}
TBOX C_BLOB::bounding_box() {  //bounding box
  C_OUTLINE *outline;            //current outline
  C_OUTLINE_IT it = &outlines;   //outlines of blob
  TBOX box;                       //bounding box

  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    outline = it.data ();
    box += outline->bounding_box ();
  }
  return box;
}
inT32 C_BLOB::perimeter() {
  C_OUTLINE *outline;            // current outline
  C_OUTLINE_IT it = &outlines;   // outlines of blob
  inT32 total;                   // total perimeter

  total = 0;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    outline = it.data();
    total += outline->perimeter();
  }
  return total;
}
inT32 C_BLOB::outer_area() {  //area
  C_OUTLINE *outline;            //current outline
  C_OUTLINE_IT it = &outlines;   //outlines of blob
  inT32 total;                   //total area

  total = 0;
  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    outline = it.data ();
    total += outline->outer_area ();
  }
  return total;
}
Beispiel #9
0
// Adds the selected outlines to the indcated real blobs, and puts the rest
// back in rej_cblobs where they came from. Where the target_blobs entry is
// nullptr, a run of wanted outlines is put into a single new blob.
// Ownership of the outlines is transferred back to the word. (Hence
// GenericVector and not PointerVector.)
// Returns true if any new blob was added to the start of the word, which
// suggests that it might need joining to the word before it, and likewise
// sets make_next_word_fuzzy true if any new blob was added to the end.
bool WERD::AddSelectedOutlines(const GenericVector<bool>& wanted,
                               const GenericVector<C_BLOB*>& target_blobs,
                               const GenericVector<C_OUTLINE*>& outlines,
                               bool* make_next_word_fuzzy) {
  bool outline_added_to_start = false;
  if (make_next_word_fuzzy != nullptr) *make_next_word_fuzzy = false;
  C_BLOB_IT rej_it(&rej_cblobs);
  for (int i = 0; i < outlines.size(); ++i) {
    C_OUTLINE* outline = outlines[i];
    if (outline == nullptr) continue;  // Already used it.
    if (wanted[i]) {
      C_BLOB* target_blob = target_blobs[i];
      TBOX noise_box = outline->bounding_box();
      if (target_blob == nullptr) {
        target_blob = new C_BLOB(outline);
        // Need to find the insertion point.
        C_BLOB_IT blob_it(&cblobs);
        for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
             blob_it.forward()) {
          C_BLOB* blob = blob_it.data();
          TBOX blob_box = blob->bounding_box();
          if (blob_box.left() > noise_box.left()) {
            if (blob_it.at_first() && !flag(W_FUZZY_SP) && !flag(W_FUZZY_NON)) {
              // We might want to join this word to its predecessor.
              outline_added_to_start = true;
            }
            blob_it.add_before_stay_put(target_blob);
            break;
          }
        }
        if (blob_it.cycled_list()) {
          blob_it.add_to_end(target_blob);
          if (make_next_word_fuzzy != nullptr) *make_next_word_fuzzy = true;
        }
        // Add all consecutive wanted, but null-blob outlines to same blob.
        C_OUTLINE_IT ol_it(target_blob->out_list());
        while (i + 1 < outlines.size() && wanted[i + 1] &&
               target_blobs[i + 1] == nullptr) {
          ++i;
          ol_it.add_to_end(outlines[i]);
        }
      } else {
        // Insert outline into this blob.
        C_OUTLINE_IT ol_it(target_blob->out_list());
        ol_it.add_to_end(outline);
      }
    } else {
      // Put back on noise list.
      rej_it.add_to_end(new C_BLOB(outline));
    }
  }
  return outline_added_to_start;
}
Beispiel #10
0
void close_chopped_cfragments(                             //chop the outline
                              C_OUTLINE_FRAG_LIST *frags,  //list to clear
                              C_OUTLINE_LIST *children,    //potential children
                              float pitch_error,           //allowed shrinkage
                              C_OUTLINE_IT *dest_it        //output list
                             ) {
                                 //iterator
  C_OUTLINE_FRAG_IT frag_it = frags;
  C_OUTLINE_FRAG *bottom_frag;   //bottom of cut
  C_OUTLINE_FRAG *top_frag;      //top of cut
  C_OUTLINE *outline;            //new outline
  C_OUTLINE *child;              //current child
  C_OUTLINE_IT child_it = children;
  C_OUTLINE_IT olchild_it;       //children of outline

  while (!frag_it.empty()) {
    frag_it.move_to_first();
                                 // get bottom one
    bottom_frag = frag_it.extract();
    frag_it.forward();
    top_frag = frag_it.data();  // look at next
    if ((bottom_frag->steps == nullptr && top_frag->steps == nullptr)
    || (bottom_frag->steps != nullptr && top_frag->steps != nullptr)) {
      if (frag_it.data_relative(1)->ycoord == top_frag->ycoord)
        frag_it.forward();
    }
    top_frag = frag_it.extract();
    if (top_frag->other_end != bottom_frag) {
      outline = join_chopped_fragments(bottom_frag, top_frag);
      ASSERT_HOST(outline == nullptr);
    } else {
      outline = join_chopped_fragments(bottom_frag, top_frag);
      if (outline != nullptr) {
        olchild_it.set_to_list(outline->child());
        for (child_it.mark_cycle_pt(); !child_it.cycled_list();
             child_it.forward()) {
          child = child_it.data();
          if (*child < *outline)
            olchild_it.add_to_end(child_it.extract());
        }
        if (outline->bounding_box().width() > pitch_error)
          dest_it->add_after_then_move(outline);
        else
          delete outline;          // Make it disappear.
      }
    }
  }
  while (!child_it.empty ()) {
    dest_it->add_after_then_move (child_it.extract ());
    child_it.forward ();
  }
}
static void reverse_outline_list(                      //reverse outlines
                                 C_OUTLINE_LIST *list  //outline to reverse
                                ) {
  C_OUTLINE *outline;            //current outline
  C_OUTLINE_IT it = list;        //iterator

  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    outline = it.data ();
    outline->reverse ();         //reverse it
    if (!outline->child ()->empty ())
      reverse_outline_list (outline->child ());
  }
}
Beispiel #12
0
// If this outline is smaller than the given min_size, delete this and
// remove from its list, via *it, after checking that *it points to this.
// Otherwise, if any children of this are too small, delete them.
// On entry, *it must be an iterator pointing to this. If this gets deleted
// then this is extracted from *it, so an iteration can continue.
void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) {
  if (box.width() < min_size || box.height() < min_size) {
    ASSERT_HOST(this == it->data());
    delete it->extract();  // Too small so get rid of it and any children.
  } else if (!children.empty()) {
    // Search the children of this, deleting any that are too small.
    C_OUTLINE_IT child_it(&children);
    for (child_it.mark_cycle_pt(); !child_it.cycled_list();
         child_it.forward()) {
      C_OUTLINE* child = child_it.data();
      child->RemoveSmallRecursive(min_size, &child_it);
    }
  }
}
inT32 C_BLOB::count_transitions(                 //area
                                inT32 threshold  //on size
                               ) {
  C_OUTLINE *outline;            //current outline
  C_OUTLINE_IT it = &outlines;   //outlines of blob
  inT32 total;                   //total area

  total = 0;
  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    outline = it.data ();
    total += outline->count_transitions (threshold);
  }
  return total;
}
Beispiel #14
0
void fill_buckets(                           // find blobs
                  C_OUTLINE_LIST *outlines,  // outlines in block
                  OL_BUCKETS *buckets        // output buckets
                 ) {
  TBOX ol_box;                     // outline box
  C_OUTLINE_IT out_it = outlines;  // iterator
  C_OUTLINE_IT bucket_it;          // iterator in bucket
  C_OUTLINE *outline;              // current outline

  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    outline = out_it.extract();  // take off list
                                 // get box
    ol_box = outline->bounding_box();
    bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom()));
    bucket_it.add_to_end(outline);
  }
}
static void plot_outline_list(                       //draw outlines
                              C_OUTLINE_LIST *list,  //outline to draw
                              ScrollView* window,         //window to draw in
                              ScrollView::Color colour,         //colour to use
                              ScrollView::Color child_colour    //colour of children
                             ) {
  C_OUTLINE *outline;            //current outline
  C_OUTLINE_IT it = list;        //iterator

  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    outline = it.data ();
                                 //draw it
    outline->plot (window, colour);
    if (!outline->child ()->empty ())
      plot_outline_list (outline->child (), window,
        child_colour, child_colour);
  }
}
Beispiel #16
0
// Removes noise from the word by moving small outlines to the rej_cblobs
// list, based on the size_threshold.
void WERD::CleanNoise(float size_threshold) {
  C_BLOB_IT blob_it(&cblobs);
  C_BLOB_IT rej_it(&rej_cblobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    C_BLOB* blob = blob_it.data();
    C_OUTLINE_IT ol_it(blob->out_list());
    for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
      C_OUTLINE* outline = ol_it.data();
      TBOX ol_box = outline->bounding_box();
      int ol_size =
          ol_box.width() > ol_box.height() ? ol_box.width() : ol_box.height();
      if (ol_size < size_threshold) {
        // This outline is too small. Move it to a separate blob in the
        // reject blobs list.
        C_BLOB* rej_blob = new C_BLOB(ol_it.extract());
        rej_it.add_after_then_move(rej_blob);
      }
    }
    if (blob->out_list()->empty()) delete blob_it.extract();
  }
}
C_BLOB::C_BLOB(                              //constructor
               C_OUTLINE_LIST *outline_list  //in random order
              ) {
  C_OUTLINE *outline;            //current outline
  C_OUTLINE_IT it = outline_list;//iterator

  while (!it.empty ()) {         //grab the list
    outline = it.extract ();     //get off the list
                                 //put it in place
    position_outline(outline, &outlines);
    if (!it.empty ())
      it.forward ();
  }
  it.set_to_list (&outlines);
  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    outline = it.data ();
    if (outline->turn_direction () < 0) {
      outline->reverse ();
      reverse_outline_list (outline->child ());
      outline->set_flag (COUT_INVERSE, TRUE);
    }
    else {
      outline->set_flag (COUT_INVERSE, FALSE);
    }
  }
}
Beispiel #18
0
bool Textord::clean_noise_from_row(          //remove empties
        ROW* row  //row to clean
) {
  bool testing_on;
  TBOX blob_box;                 //bounding box
  C_BLOB *blob;                  //current blob
  C_OUTLINE *outline;            //current outline
  WERD *word;                    //current word
  int32_t blob_size;             //biggest size
  int32_t trans_count = 0;       //no of transitions
  int32_t trans_threshold;       //noise tolerance
  int32_t dot_count;             //small objects
  int32_t norm_count;            //normal objects
  int32_t super_norm_count;      //real char-like
                                 //words of row
  WERD_IT word_it = row->word_list ();
  C_BLOB_IT blob_it;             //blob iterator
  C_OUTLINE_IT out_it;           //outline iterator

  testing_on = textord_test_y > row->base_line (textord_test_x)
               && textord_show_blobs
               && textord_test_y < row->base_line (textord_test_x) + row->x_height ();
  dot_count = 0;
  norm_count = 0;
  super_norm_count = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();      //current word
                                 //blobs in word
    blob_it.set_to_list (word->cblob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      if (!word->flag (W_DONT_CHOP)) {
                                 //get outlines
        out_it.set_to_list (blob->out_list ());
        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
        out_it.forward ()) {
          outline = out_it.data ();
          blob_box = outline->bounding_box ();
          blob_size =
            blob_box.width () >
            blob_box.height ()? blob_box.width () : blob_box.
            height();
          if (blob_size < textord_noise_sizelimit * row->x_height ())
            dot_count++;         //count smal outlines
          if (!outline->child ()->empty ()
            && blob_box.height () <
            (1 + textord_noise_syfract) * row->x_height ()
            && blob_box.height () >
            (1 - textord_noise_syfract) * row->x_height ()
            && blob_box.width () <
            (1 + textord_noise_sxfract) * row->x_height ()
            && blob_box.width () >
            (1 - textord_noise_sxfract) * row->x_height ())
            super_norm_count++;  //count smal outlines
        }
      }
      else
        super_norm_count++;
      blob_box = blob->bounding_box ();
      blob_size =
        blob_box.width () >
        blob_box.height ()? blob_box.width () : blob_box.height ();
      if (blob_size >= textord_noise_sizelimit * row->x_height ()
          && blob_size < row->x_height () * 2) {
        trans_threshold = blob_size / textord_noise_sizefraction;
        trans_count = blob->count_transitions (trans_threshold);
        if (trans_count < textord_noise_translimit)
          norm_count++;
      }
      else if (blob_box.height () > row->x_height () * 2
        && (!word_it.at_first () || !blob_it.at_first ()))
        dot_count += 2;
      if (testing_on) {
        tprintf
          ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n",
          blob_box.left (), blob_box.bottom (), blob_box.right (),
          blob_box.top (), blob->out_list ()->length (), trans_count,
          blob_box.bottom () - row->base_line (blob_box.left ()));
      }
    }
  }
  if (textord_noise_debug) {
    tprintf ("Row ending at (%d,%g):",
      blob_box.right (), row->base_line (blob_box.right ()));
    tprintf (" R=%g, dc=%d, nc=%d, %s\n",
      norm_count > 0 ? (float) dot_count / norm_count : 9999,
      dot_count, norm_count,
      dot_count > norm_count * textord_noise_normratio
      && dot_count > 2 ? "REJECTED" : "ACCEPTED");
  }
  return super_norm_count < textord_noise_sncount
    && dot_count > norm_count * textord_noise_rowratio && dot_count > 2;
}
Beispiel #19
0
void Textord::clean_noise_from_words(          //remove empties
                                     ROW *row  //row to clean
                                    ) {
  TBOX blob_box;                 //bounding box
  C_BLOB *blob;                  //current blob
  C_OUTLINE *outline;            //current outline
  WERD *word;                    //current word
  int32_t blob_size;             //biggest size
  int32_t trans_count;           //no of transitions
  int32_t trans_threshold;       //noise tolerance
  int32_t dot_count;             //small objects
  int32_t norm_count;            //normal objects
  int32_t dud_words;             //number discarded
  int32_t ok_words;              //number remaining
  int32_t word_index;            //current word
                                 //words of row
  WERD_IT word_it = row->word_list ();
  C_BLOB_IT blob_it;             //blob iterator
  C_OUTLINE_IT out_it;           //outline iterator

  ok_words = word_it.length ();
  if (ok_words == 0 || textord_no_rejects)
    return;
  // was it chucked
  std::vector<int8_t> word_dud(ok_words);
  dud_words = 0;
  ok_words = 0;
  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();      //current word
    dot_count = 0;
    norm_count = 0;
                                 //blobs in word
    blob_it.set_to_list (word->cblob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      if (!word->flag (W_DONT_CHOP)) {
                                 //get outlines
        out_it.set_to_list (blob->out_list ());
        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
        out_it.forward ()) {
          outline = out_it.data ();
          blob_box = outline->bounding_box ();
          blob_size =
            blob_box.width () >
            blob_box.height ()? blob_box.width () : blob_box.
            height();
          if (blob_size < textord_noise_sizelimit * row->x_height ())
            dot_count++;         //count smal outlines
          if (!outline->child ()->empty ()
            && blob_box.height () <
            (1 + textord_noise_syfract) * row->x_height ()
            && blob_box.height () >
            (1 - textord_noise_syfract) * row->x_height ()
            && blob_box.width () <
            (1 + textord_noise_sxfract) * row->x_height ()
            && blob_box.width () >
            (1 - textord_noise_sxfract) * row->x_height ())
            norm_count++;        //count smal outlines
        }
      }
      else
        norm_count++;
      blob_box = blob->bounding_box ();
      blob_size =
        blob_box.width () >
        blob_box.height ()? blob_box.width () : blob_box.height ();
      if (blob_size >= textord_noise_sizelimit * row->x_height ()
      && blob_size < row->x_height () * 2) {
        trans_threshold = blob_size / textord_noise_sizefraction;
        trans_count = blob->count_transitions (trans_threshold);
        if (trans_count < textord_noise_translimit)
          norm_count++;
      }
      else if (blob_box.height () > row->x_height () * 2
        && (!word_it.at_first () || !blob_it.at_first ()))
        dot_count += 2;
    }
    if (dot_count > 2 && !word->flag(W_REP_CHAR)) {
      if (dot_count > norm_count * textord_noise_normratio * 2)
        word_dud[word_index] = 2;
      else if (dot_count > norm_count * textord_noise_normratio)
        word_dud[word_index] = 1;
      else
        word_dud[word_index] = 0;
    } else {
      word_dud[word_index] = 0;
    }
    if (word_dud[word_index] == 2)
      dud_words++;
    else
      ok_words++;
    word_index++;
  }

  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    if (word_dud[word_index] == 2
    || (word_dud[word_index] == 1 && dud_words > ok_words)) {
      word = word_it.data();  // Current word.
      // Previously we threw away the entire word.
      // Now just aggressively throw all small blobs into the reject list, where
      // the classifier can decide whether they are actually needed.
      word->CleanNoise(textord_noise_sizelimit * row->x_height());
    }
    word_index++;
  }
}
Beispiel #20
0
void fixed_split_coutline(                        //chop the outline
                          C_OUTLINE *srcline,     //source outline
                          int16_t chop_coord,       //place to chop
                          float pitch_error,      //allowed deviation
                          C_OUTLINE_IT *left_it,  //left half of chop
                          C_OUTLINE_IT *right_it  //right half of chop
                         ) {
  C_OUTLINE *child;              //child outline
  TBOX srcbox;                    //box of outline
  C_OUTLINE_LIST left_ch;        //left children
  C_OUTLINE_LIST right_ch;       //right children
  C_OUTLINE_FRAG_LIST left_frags;//chopped fragments
  C_OUTLINE_FRAG_LIST right_frags;;
  C_OUTLINE_IT left_ch_it = &left_ch;
                                 //for whole children
  C_OUTLINE_IT right_ch_it = &right_ch;
                                 //for holes
  C_OUTLINE_IT child_it = srcline->child ();

  srcbox = srcline->bounding_box();
  if (srcbox.left() + srcbox.right() <= chop_coord * 2
      && srcbox.right() < chop_coord + pitch_error) {
    // Whole outline is in the left side or not far over the chop_coord,
    // so put the whole thing on the left.
    left_it->add_after_then_move(srcline);
  } else if (srcbox.left() + srcbox.right() > chop_coord * 2
             && srcbox.left () > chop_coord - pitch_error) {
    // Whole outline is in the right side or not far over the chop_coord,
    // so put the whole thing on the right.
   right_it->add_before_stay_put(srcline);
  } else {
    // Needs real chopping.
    if (fixed_chop_coutline(srcline, chop_coord, pitch_error,
        &left_frags, &right_frags)) {
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        child = child_it.extract();
        srcbox = child->bounding_box();
        if (srcbox.right() < chop_coord) {
          // Whole child is on the left.
          left_ch_it.add_after_then_move(child);
        } else if (srcbox.left() > chop_coord) {
          // Whole child is on the right.
          right_ch_it.add_after_then_move (child);
        } else {
          // No pitch_error is allowed when chopping children to prevent
          // impossible outlines from being created.
          if (fixed_chop_coutline(child, chop_coord, 0.0f,
              &left_frags, &right_frags)) {
            delete child;
          } else {
            if (srcbox.left() + srcbox.right() <= chop_coord * 2)
              left_ch_it.add_after_then_move(child);
            else
              right_ch_it.add_after_then_move(child);
          }
        }
      }
      close_chopped_cfragments(&left_frags, &left_ch, pitch_error, left_it);
      close_chopped_cfragments(&right_frags, &right_ch, pitch_error, right_it);
      ASSERT_HOST(left_ch.empty() && right_ch.empty());
      // No children left.
      delete srcline;            // Smashed up.
    } else {
      // Chop failed. Just use middle coord.
      if (srcbox.left() + srcbox.right() <= chop_coord * 2)
        left_it->add_after_then_move(srcline);  // Stick whole in left.
      else
        right_it->add_before_stay_put(srcline);
    }
  }
}
Beispiel #21
0
inT32 OL_BUCKETS::count_children(                     // recursive count
                                 C_OUTLINE *outline,  // parent outline
                                 inT32 max_count      // max output
                                ) {
  BOOL8 parent_box;              // could it be boxy
  inT16 xmin, xmax;              // coord limits
  inT16 ymin, ymax;
  inT16 xindex, yindex;          // current bucket
  C_OUTLINE *child;              // current child
  inT32 child_count;             // no of children
  inT32 grandchild_count;        // no of grandchildren
  inT32 parent_area;             // potential box
  FLOAT32 max_parent_area;       // potential box
  inT32 child_area;              // current child
  inT32 child_length;            // current child
  TBOX olbox;
  C_OUTLINE_IT child_it;         // search iterator

  olbox = outline->bounding_box();
  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
  child_count = 0;
  grandchild_count = 0;
  parent_area = 0;
  max_parent_area = 0;
  parent_box = TRUE;
  for (yindex = ymin; yindex <= ymax; yindex++) {
    for (xindex = xmin; xindex <= xmax; xindex++) {
      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
      if (child_it.empty())
        continue;
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        child = child_it.data();
        if (child != outline && *child < *outline) {
          child_count++;
          if (child_count <= max_count) {
            int max_grand =(max_count - child_count) /
                            edges_children_per_grandchild;
            if (max_grand > 0)
              grandchild_count += count_children(child, max_grand) *
                                  edges_children_per_grandchild;
            else
              grandchild_count += count_children(child, 1);
          }
          if (child_count + grandchild_count > max_count) {
            if (edges_debug)
              tprintf("Discarding parent with child count=%d, gc=%d\n",
                      child_count,grandchild_count);
            return child_count + grandchild_count;
          }
          if (parent_area == 0) {
            parent_area = outline->outer_area();
            if (parent_area < 0)
              parent_area = -parent_area;
            max_parent_area = outline->bounding_box().area() * edges_boxarea;
            if (parent_area < max_parent_area)
              parent_box = FALSE;
          }
          if (parent_box &&
              (!edges_children_fix ||
               child->bounding_box().height() > edges_min_nonhole)) {
            child_area = child->outer_area();
            if (child_area < 0)
              child_area = -child_area;
            if (edges_children_fix) {
              if (parent_area - child_area < max_parent_area) {
                parent_box = FALSE;
                continue;
              }
              if (grandchild_count > 0) {
                if (edges_debug)
                  tprintf("Discarding parent of area %d, child area=%d, max%g "
                          "with gc=%d\n",
                          parent_area, child_area, max_parent_area,
                          grandchild_count);
                return max_count + 1;
              }
              child_length = child->pathlength();
              if (child_length * child_length >
                  child_area * edges_patharea_ratio) {
                if (edges_debug)
                  tprintf("Discarding parent of area %d, child area=%d, max%g "
                          "with child length=%d\n",
                          parent_area, child_area, max_parent_area,
                          child_length);
                return max_count + 1;
              }
            }
            if (child_area < child->bounding_box().area() * edges_childarea) {
              if (edges_debug)
                tprintf("Discarding parent of area %d, child area=%d, max%g "
                        "with child rect=%d\n",
                        parent_area, child_area, max_parent_area,
                        child->bounding_box().area());
              return max_count + 1;
            }
          }
        }
      }
    }
  }
  return child_count + grandchild_count;
}
Beispiel #22
0
void Textord::clean_noise_from_words(          //remove empties
                                     ROW *row  //row to clean
                                    ) {
  TBOX blob_box;                  //bounding box
  inT8 *word_dud;                //was it chucked
  C_BLOB *blob;                  //current blob
  C_OUTLINE *outline;            //current outline
  WERD *word;                    //current word
  inT32 blob_size;               //biggest size
  inT32 trans_count;             //no of transitions
  inT32 trans_threshold;         //noise tolerance
  inT32 dot_count;               //small objects
  inT32 norm_count;              //normal objects
  inT32 dud_words;               //number discarded
  inT32 ok_words;                //number remaining
  inT32 word_index;              //current word
                                 //words of row
  WERD_IT word_it = row->word_list ();
  C_BLOB_IT blob_it;             //blob iterator
  C_OUTLINE_IT out_it;           //outline iterator

  ok_words = word_it.length ();
  if (ok_words == 0 || textord_no_rejects)
    return;
  word_dud = (inT8 *) alloc_mem (ok_words * sizeof (inT8));
  dud_words = 0;
  ok_words = 0;
  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();      //current word
    dot_count = 0;
    norm_count = 0;
                                 //blobs in word
    blob_it.set_to_list (word->cblob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      if (!word->flag (W_DONT_CHOP)) {
                                 //get outlines
        out_it.set_to_list (blob->out_list ());
        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
        out_it.forward ()) {
          outline = out_it.data ();
          blob_box = outline->bounding_box ();
          blob_size =
            blob_box.width () >
            blob_box.height ()? blob_box.width () : blob_box.
            height();
          if (blob_size < textord_noise_sizelimit * row->x_height ())
            dot_count++;         //count smal outlines
          if (!outline->child ()->empty ()
            && blob_box.height () <
            (1 + textord_noise_syfract) * row->x_height ()
            && blob_box.height () >
            (1 - textord_noise_syfract) * row->x_height ()
            && blob_box.width () <
            (1 + textord_noise_sxfract) * row->x_height ()
            && blob_box.width () >
            (1 - textord_noise_sxfract) * row->x_height ())
            norm_count++;        //count smal outlines
        }
      }
      else
        norm_count++;
      blob_box = blob->bounding_box ();
      blob_size =
        blob_box.width () >
        blob_box.height ()? blob_box.width () : blob_box.height ();
      if (blob_size >= textord_noise_sizelimit * row->x_height ()
      && blob_size < row->x_height () * 2) {
        trans_threshold = blob_size / textord_noise_sizefraction;
        trans_count = blob->count_transitions (trans_threshold);
        if (trans_count < textord_noise_translimit)
          norm_count++;
      }
      else if (blob_box.height () > row->x_height () * 2
        && (!word_it.at_first () || !blob_it.at_first ()))
        dot_count += 2;
    }
    if (dot_count > 2) {
      if (dot_count > norm_count * textord_noise_normratio * 2)
        word_dud[word_index] = 2;
      else if (dot_count > norm_count * textord_noise_normratio)
        word_dud[word_index] = 1;
      else
        word_dud[word_index] = 0;
    }
    else
      word_dud[word_index] = 0;
    if (word_dud[word_index] == 2)
      dud_words++;
    else
      ok_words++;
    word_index++;
  }

  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    if (word_dud[word_index] == 2
    || (word_dud[word_index] == 1 && dud_words > ok_words)) {
      word = word_it.data ();    //current word
                                 //rejected blobs
      blob_it.set_to_list (word->rej_cblob_list ());
                                 //move from blobs
      blob_it.add_list_after (word->cblob_list ());
    }
    word_index++;
  }
  free_mem(word_dud);
}