// Helper function to divide the input blobs over noise, small, medium // and large lists. Blobs small in height and (small in width or large in width) // go in the noise list. Dash (-) candidates go in the small list, and // medium and large are by height. // SIDE-EFFECT: reset all blobs to initial state by calling Init(). static void SizeFilterBlobs(int min_height, int max_height, BLOBNBOX_LIST* src_list, BLOBNBOX_LIST* noise_list, BLOBNBOX_LIST* small_list, BLOBNBOX_LIST* medium_list, BLOBNBOX_LIST* large_list) { BLOBNBOX_IT noise_it(noise_list); BLOBNBOX_IT small_it(small_list); BLOBNBOX_IT medium_it(medium_list); BLOBNBOX_IT large_it(large_list); for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) { BLOBNBOX* blob = src_it.extract(); blob->ReInit(); int width = blob->bounding_box().width(); int height = blob->bounding_box().height(); if (height < min_height && (width < min_height || width > max_height)) noise_it.add_after_then_move(blob); else if (height > max_height) large_it.add_after_then_move(blob); else if (height < min_height) small_it.add_after_then_move(blob); else medium_it.add_after_then_move(blob); } }
// Moves blobs that look like they don't sit well on a textline from the // input blobs list to the output small_blobs list. // This gets them away from initial textline finding to stop diacritics // from forming incorrect textlines. (Introduced mainly to fix Thai.) void TextlineProjection::MoveNonTextlineBlobs( BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const { BLOBNBOX_IT it(blobs); BLOBNBOX_IT small_it(small_blobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); const TBOX& box = blob->bounding_box(); bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) { blob->ClearNeighbours(); small_it.add_to_end(it.extract()); } } }