示例#1
0
static void render_outline_list(C_OUTLINE_LIST *list,
                                int left, int top, Pix* pix) {
  C_OUTLINE_IT it(list);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    C_OUTLINE* outline = it.data();
    outline->render(left, top, pix);
    if (!outline->child()->empty())
      render_outline_list(outline->child(), left, top, pix);
  }
}
static void reverse_outline_list(                      //reverse outlines
                                 C_OUTLINE_LIST *list  //outline to reverse
                                ) {
  C_OUTLINE *outline;            //current outline
  C_OUTLINE_IT it = list;        //iterator

  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    outline = it.data ();
    outline->reverse ();         //reverse it
    if (!outline->child ()->empty ())
      reverse_outline_list (outline->child ());
  }
}
C_BLOB::C_BLOB(                              //constructor
               C_OUTLINE_LIST *outline_list  //in random order
              ) {
  C_OUTLINE *outline;            //current outline
  C_OUTLINE_IT it = outline_list;//iterator

  while (!it.empty ()) {         //grab the list
    outline = it.extract ();     //get off the list
                                 //put it in place
    position_outline(outline, &outlines);
    if (!it.empty ())
      it.forward ();
  }
  it.set_to_list (&outlines);
  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    outline = it.data ();
    if (outline->turn_direction () < 0) {
      outline->reverse ();
      reverse_outline_list (outline->child ());
      outline->set_flag (COUT_INVERSE, TRUE);
    }
    else {
      outline->set_flag (COUT_INVERSE, FALSE);
    }
  }
}
static void plot_outline_list(                       //draw outlines
                              C_OUTLINE_LIST *list,  //outline to draw
                              ScrollView* window,         //window to draw in
                              ScrollView::Color colour,         //colour to use
                              ScrollView::Color child_colour    //colour of children
                             ) {
  C_OUTLINE *outline;            //current outline
  C_OUTLINE_IT it = list;        //iterator

  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    outline = it.data ();
                                 //draw it
    outline->plot (window, colour);
    if (!outline->child ()->empty ())
      plot_outline_list (outline->child (), window,
        child_colour, child_colour);
  }
}
示例#5
0
void close_chopped_cfragments(                             //chop the outline
                              C_OUTLINE_FRAG_LIST *frags,  //list to clear
                              C_OUTLINE_LIST *children,    //potential children
                              float pitch_error,           //allowed shrinkage
                              C_OUTLINE_IT *dest_it        //output list
                             ) {
                                 //iterator
  C_OUTLINE_FRAG_IT frag_it = frags;
  C_OUTLINE_FRAG *bottom_frag;   //bottom of cut
  C_OUTLINE_FRAG *top_frag;      //top of cut
  C_OUTLINE *outline;            //new outline
  C_OUTLINE *child;              //current child
  C_OUTLINE_IT child_it = children;
  C_OUTLINE_IT olchild_it;       //children of outline

  while (!frag_it.empty()) {
    frag_it.move_to_first();
                                 // get bottom one
    bottom_frag = frag_it.extract();
    frag_it.forward();
    top_frag = frag_it.data();  // look at next
    if ((bottom_frag->steps == nullptr && top_frag->steps == nullptr)
    || (bottom_frag->steps != nullptr && top_frag->steps != nullptr)) {
      if (frag_it.data_relative(1)->ycoord == top_frag->ycoord)
        frag_it.forward();
    }
    top_frag = frag_it.extract();
    if (top_frag->other_end != bottom_frag) {
      outline = join_chopped_fragments(bottom_frag, top_frag);
      ASSERT_HOST(outline == nullptr);
    } else {
      outline = join_chopped_fragments(bottom_frag, top_frag);
      if (outline != nullptr) {
        olchild_it.set_to_list(outline->child());
        for (child_it.mark_cycle_pt(); !child_it.cycled_list();
             child_it.forward()) {
          child = child_it.data();
          if (*child < *outline)
            olchild_it.add_to_end(child_it.extract());
        }
        if (outline->bounding_box().width() > pitch_error)
          dest_it->add_after_then_move(outline);
        else
          delete outline;          // Make it disappear.
      }
    }
  }
  while (!child_it.empty ()) {
    dest_it->add_after_then_move (child_it.extract ());
    child_it.forward ();
  }
}
示例#6
0
void Textord::clean_noise_from_words(          //remove empties
                                     ROW *row  //row to clean
                                    ) {
  TBOX blob_box;                 //bounding box
  C_BLOB *blob;                  //current blob
  C_OUTLINE *outline;            //current outline
  WERD *word;                    //current word
  int32_t blob_size;             //biggest size
  int32_t trans_count;           //no of transitions
  int32_t trans_threshold;       //noise tolerance
  int32_t dot_count;             //small objects
  int32_t norm_count;            //normal objects
  int32_t dud_words;             //number discarded
  int32_t ok_words;              //number remaining
  int32_t word_index;            //current word
                                 //words of row
  WERD_IT word_it = row->word_list ();
  C_BLOB_IT blob_it;             //blob iterator
  C_OUTLINE_IT out_it;           //outline iterator

  ok_words = word_it.length ();
  if (ok_words == 0 || textord_no_rejects)
    return;
  // was it chucked
  std::vector<int8_t> word_dud(ok_words);
  dud_words = 0;
  ok_words = 0;
  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();      //current word
    dot_count = 0;
    norm_count = 0;
                                 //blobs in word
    blob_it.set_to_list (word->cblob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      if (!word->flag (W_DONT_CHOP)) {
                                 //get outlines
        out_it.set_to_list (blob->out_list ());
        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
        out_it.forward ()) {
          outline = out_it.data ();
          blob_box = outline->bounding_box ();
          blob_size =
            blob_box.width () >
            blob_box.height ()? blob_box.width () : blob_box.
            height();
          if (blob_size < textord_noise_sizelimit * row->x_height ())
            dot_count++;         //count smal outlines
          if (!outline->child ()->empty ()
            && blob_box.height () <
            (1 + textord_noise_syfract) * row->x_height ()
            && blob_box.height () >
            (1 - textord_noise_syfract) * row->x_height ()
            && blob_box.width () <
            (1 + textord_noise_sxfract) * row->x_height ()
            && blob_box.width () >
            (1 - textord_noise_sxfract) * row->x_height ())
            norm_count++;        //count smal outlines
        }
      }
      else
        norm_count++;
      blob_box = blob->bounding_box ();
      blob_size =
        blob_box.width () >
        blob_box.height ()? blob_box.width () : blob_box.height ();
      if (blob_size >= textord_noise_sizelimit * row->x_height ()
      && blob_size < row->x_height () * 2) {
        trans_threshold = blob_size / textord_noise_sizefraction;
        trans_count = blob->count_transitions (trans_threshold);
        if (trans_count < textord_noise_translimit)
          norm_count++;
      }
      else if (blob_box.height () > row->x_height () * 2
        && (!word_it.at_first () || !blob_it.at_first ()))
        dot_count += 2;
    }
    if (dot_count > 2 && !word->flag(W_REP_CHAR)) {
      if (dot_count > norm_count * textord_noise_normratio * 2)
        word_dud[word_index] = 2;
      else if (dot_count > norm_count * textord_noise_normratio)
        word_dud[word_index] = 1;
      else
        word_dud[word_index] = 0;
    } else {
      word_dud[word_index] = 0;
    }
    if (word_dud[word_index] == 2)
      dud_words++;
    else
      ok_words++;
    word_index++;
  }

  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    if (word_dud[word_index] == 2
    || (word_dud[word_index] == 1 && dud_words > ok_words)) {
      word = word_it.data();  // Current word.
      // Previously we threw away the entire word.
      // Now just aggressively throw all small blobs into the reject list, where
      // the classifier can decide whether they are actually needed.
      word->CleanNoise(textord_noise_sizelimit * row->x_height());
    }
    word_index++;
  }
}
示例#7
0
bool Textord::clean_noise_from_row(          //remove empties
        ROW* row  //row to clean
) {
  bool testing_on;
  TBOX blob_box;                 //bounding box
  C_BLOB *blob;                  //current blob
  C_OUTLINE *outline;            //current outline
  WERD *word;                    //current word
  int32_t blob_size;             //biggest size
  int32_t trans_count = 0;       //no of transitions
  int32_t trans_threshold;       //noise tolerance
  int32_t dot_count;             //small objects
  int32_t norm_count;            //normal objects
  int32_t super_norm_count;      //real char-like
                                 //words of row
  WERD_IT word_it = row->word_list ();
  C_BLOB_IT blob_it;             //blob iterator
  C_OUTLINE_IT out_it;           //outline iterator

  testing_on = textord_test_y > row->base_line (textord_test_x)
               && textord_show_blobs
               && textord_test_y < row->base_line (textord_test_x) + row->x_height ();
  dot_count = 0;
  norm_count = 0;
  super_norm_count = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();      //current word
                                 //blobs in word
    blob_it.set_to_list (word->cblob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      if (!word->flag (W_DONT_CHOP)) {
                                 //get outlines
        out_it.set_to_list (blob->out_list ());
        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
        out_it.forward ()) {
          outline = out_it.data ();
          blob_box = outline->bounding_box ();
          blob_size =
            blob_box.width () >
            blob_box.height ()? blob_box.width () : blob_box.
            height();
          if (blob_size < textord_noise_sizelimit * row->x_height ())
            dot_count++;         //count smal outlines
          if (!outline->child ()->empty ()
            && blob_box.height () <
            (1 + textord_noise_syfract) * row->x_height ()
            && blob_box.height () >
            (1 - textord_noise_syfract) * row->x_height ()
            && blob_box.width () <
            (1 + textord_noise_sxfract) * row->x_height ()
            && blob_box.width () >
            (1 - textord_noise_sxfract) * row->x_height ())
            super_norm_count++;  //count smal outlines
        }
      }
      else
        super_norm_count++;
      blob_box = blob->bounding_box ();
      blob_size =
        blob_box.width () >
        blob_box.height ()? blob_box.width () : blob_box.height ();
      if (blob_size >= textord_noise_sizelimit * row->x_height ()
          && blob_size < row->x_height () * 2) {
        trans_threshold = blob_size / textord_noise_sizefraction;
        trans_count = blob->count_transitions (trans_threshold);
        if (trans_count < textord_noise_translimit)
          norm_count++;
      }
      else if (blob_box.height () > row->x_height () * 2
        && (!word_it.at_first () || !blob_it.at_first ()))
        dot_count += 2;
      if (testing_on) {
        tprintf
          ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n",
          blob_box.left (), blob_box.bottom (), blob_box.right (),
          blob_box.top (), blob->out_list ()->length (), trans_count,
          blob_box.bottom () - row->base_line (blob_box.left ()));
      }
    }
  }
  if (textord_noise_debug) {
    tprintf ("Row ending at (%d,%g):",
      blob_box.right (), row->base_line (blob_box.right ()));
    tprintf (" R=%g, dc=%d, nc=%d, %s\n",
      norm_count > 0 ? (float) dot_count / norm_count : 9999,
      dot_count, norm_count,
      dot_count > norm_count * textord_noise_normratio
      && dot_count > 2 ? "REJECTED" : "ACCEPTED");
  }
  return super_norm_count < textord_noise_sncount
    && dot_count > norm_count * textord_noise_rowratio && dot_count > 2;
}
示例#8
0
void Textord::clean_noise_from_words(          //remove empties
                                     ROW *row  //row to clean
                                    ) {
  TBOX blob_box;                  //bounding box
  inT8 *word_dud;                //was it chucked
  C_BLOB *blob;                  //current blob
  C_OUTLINE *outline;            //current outline
  WERD *word;                    //current word
  inT32 blob_size;               //biggest size
  inT32 trans_count;             //no of transitions
  inT32 trans_threshold;         //noise tolerance
  inT32 dot_count;               //small objects
  inT32 norm_count;              //normal objects
  inT32 dud_words;               //number discarded
  inT32 ok_words;                //number remaining
  inT32 word_index;              //current word
                                 //words of row
  WERD_IT word_it = row->word_list ();
  C_BLOB_IT blob_it;             //blob iterator
  C_OUTLINE_IT out_it;           //outline iterator

  ok_words = word_it.length ();
  if (ok_words == 0 || textord_no_rejects)
    return;
  word_dud = (inT8 *) alloc_mem (ok_words * sizeof (inT8));
  dud_words = 0;
  ok_words = 0;
  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();      //current word
    dot_count = 0;
    norm_count = 0;
                                 //blobs in word
    blob_it.set_to_list (word->cblob_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      if (!word->flag (W_DONT_CHOP)) {
                                 //get outlines
        out_it.set_to_list (blob->out_list ());
        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
        out_it.forward ()) {
          outline = out_it.data ();
          blob_box = outline->bounding_box ();
          blob_size =
            blob_box.width () >
            blob_box.height ()? blob_box.width () : blob_box.
            height();
          if (blob_size < textord_noise_sizelimit * row->x_height ())
            dot_count++;         //count smal outlines
          if (!outline->child ()->empty ()
            && blob_box.height () <
            (1 + textord_noise_syfract) * row->x_height ()
            && blob_box.height () >
            (1 - textord_noise_syfract) * row->x_height ()
            && blob_box.width () <
            (1 + textord_noise_sxfract) * row->x_height ()
            && blob_box.width () >
            (1 - textord_noise_sxfract) * row->x_height ())
            norm_count++;        //count smal outlines
        }
      }
      else
        norm_count++;
      blob_box = blob->bounding_box ();
      blob_size =
        blob_box.width () >
        blob_box.height ()? blob_box.width () : blob_box.height ();
      if (blob_size >= textord_noise_sizelimit * row->x_height ()
      && blob_size < row->x_height () * 2) {
        trans_threshold = blob_size / textord_noise_sizefraction;
        trans_count = blob->count_transitions (trans_threshold);
        if (trans_count < textord_noise_translimit)
          norm_count++;
      }
      else if (blob_box.height () > row->x_height () * 2
        && (!word_it.at_first () || !blob_it.at_first ()))
        dot_count += 2;
    }
    if (dot_count > 2) {
      if (dot_count > norm_count * textord_noise_normratio * 2)
        word_dud[word_index] = 2;
      else if (dot_count > norm_count * textord_noise_normratio)
        word_dud[word_index] = 1;
      else
        word_dud[word_index] = 0;
    }
    else
      word_dud[word_index] = 0;
    if (word_dud[word_index] == 2)
      dud_words++;
    else
      ok_words++;
    word_index++;
  }

  word_index = 0;
  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    if (word_dud[word_index] == 2
    || (word_dud[word_index] == 1 && dud_words > ok_words)) {
      word = word_it.data ();    //current word
                                 //rejected blobs
      blob_it.set_to_list (word->rej_cblob_list ());
                                 //move from blobs
      blob_it.add_list_after (word->cblob_list ());
    }
    word_index++;
  }
  free_mem(word_dud);
}