void cleanup_blocks( //remove empties BLOCK_LIST *blocks //list ) { BLOCK_IT block_it = blocks; //iterator ROW_IT row_it; //row iterator for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { row_it.set_to_list (block_it.data ()->row_list ()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { if (textord_noise_rejrows && !row_it.data ()->word_list ()->empty () && clean_noise_from_row (row_it.data ()) || row_it.data ()->word_list ()->empty ()) delete row_it.extract ();//lose empty row else { if (textord_noise_rejwords) clean_noise_from_words (row_it.data ()); if (textord_blshift_maxshift >= 0) tweak_row_baseline (row_it.data ()); } } if (block_it.data ()->row_list ()->empty ()) { delete block_it.extract ();//lose empty block } } }
void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) { BLOCK_IT block_it = blocks; //iterator ROW_IT row_it; //row iterator int num_rows = 0; int num_rows_all = 0; int num_blocks = 0; int num_blocks_all = 0; for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { BLOCK* block = block_it.data(); if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) { cleanup_nontext_block(block); continue; } num_rows = 0; num_rows_all = 0; if (clean_noise) { row_it.set_to_list(block->row_list()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { ROW* row = row_it.data(); ++num_rows_all; clean_small_noise_from_words(row); if ((textord_noise_rejrows && !row->word_list()->empty() && clean_noise_from_row(row)) || row->word_list()->empty()) { delete row_it.extract(); // lose empty row. } else { if (textord_noise_rejwords) clean_noise_from_words(row_it.data()); if (textord_blshift_maxshift >= 0) tweak_row_baseline(row, textord_blshift_maxshift, textord_blshift_xfraction); ++num_rows; } } } if (block->row_list()->empty()) { delete block_it.extract(); // Lose empty text blocks. } else { ++num_blocks; } ++num_blocks_all; if (textord_noise_debug) tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all); } if (textord_noise_debug) tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all); }
void Textord::cleanup_blocks( //remove empties BLOCK_LIST *blocks //list ) { BLOCK_IT block_it = blocks; //iterator ROW_IT row_it; //row iterator int num_rows = 0; int num_rows_all = 0; int num_blocks = 0; int num_blocks_all = 0; for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { num_rows = 0; num_rows_all = 0; row_it.set_to_list (block_it.data ()->row_list ()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { ++num_rows_all; clean_small_noise_from_words(row_it.data()); if ((textord_noise_rejrows && !row_it.data ()->word_list ()->empty () && clean_noise_from_row (row_it.data ())) || row_it.data ()->word_list ()->empty ()) delete row_it.extract ();//lose empty row else { if (textord_noise_rejwords) clean_noise_from_words (row_it.data ()); if (textord_blshift_maxshift >= 0) tweak_row_baseline(row_it.data(), textord_blshift_maxshift, textord_blshift_xfraction); ++num_rows; } } if (block_it.data()->row_list()->empty() && (block_it.data()->poly_block() == NULL || block_it.data()->poly_block()->IsText())) { delete block_it.extract(); // Lose empty text blocks but not other types. } else { ++num_blocks; } ++num_blocks_all; if (textord_noise_debug) tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all); } if (textord_noise_debug) tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all); }