// Remove outlines that are a tiny fraction in either width or height // of the word height. void Textord::clean_small_noise_from_words(ROW *row) { WERD_IT word_it(row->word_list()); for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { WERD* word = word_it.data(); int min_size = static_cast<int>( textord_noise_hfract * word->bounding_box().height() + 0.5); C_BLOB_IT blob_it(word->cblob_list()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* blob = blob_it.data(); C_OUTLINE_IT out_it(blob->out_list()); for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { C_OUTLINE* outline = out_it.data(); outline->RemoveSmallRecursive(min_size, &out_it); } if (blob->out_list()->empty()) { delete blob_it.extract(); } } if (word->cblob_list()->empty()) { if (!word_it.at_last()) { // The next word is no longer a fuzzy non space if it was before, // since the word before is about to be deleted. WERD* next_word = word_it.data_relative(1); if (next_word->flag(W_FUZZY_NON)) { next_word->set_flag(W_FUZZY_NON, false); } } delete word_it.extract(); } } }
// If this outline is smaller than the given min_size, delete this and // remove from its list, via *it, after checking that *it points to this. // Otherwise, if any children of this are too small, delete them. // On entry, *it must be an iterator pointing to this. If this gets deleted // then this is extracted from *it, so an iteration can continue. void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) { if (box.width() < min_size || box.height() < min_size) { ASSERT_HOST(this == it->data()); delete it->extract(); // Too small so get rid of it and any children. } else if (!children.empty()) { // Search the children of this, deleting any that are too small. C_OUTLINE_IT child_it(&children); for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { C_OUTLINE* child = child_it.data(); child->RemoveSmallRecursive(min_size, &child_it); } } }