void cleanup_blocks(                    //remove empties
                    BLOCK_LIST *blocks  //list
                   ) {
  BLOCK_IT block_it = blocks;    //iterator
  ROW_IT row_it;                 //row iterator

  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
  block_it.forward ()) {
    row_it.set_to_list (block_it.data ()->row_list ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      if (textord_noise_rejrows
        && !row_it.data ()->word_list ()->empty ()
        && clean_noise_from_row (row_it.data ())
        || row_it.data ()->word_list ()->empty ())
        delete row_it.extract ();//lose empty row
      else {
        if (textord_noise_rejwords)
          clean_noise_from_words (row_it.data ());
        if (textord_blshift_maxshift >= 0)
          tweak_row_baseline (row_it.data ());
      }
    }
    if (block_it.data ()->row_list ()->empty ()) {
      delete block_it.extract ();//lose empty block
    }
  }
}
Exemple #2
0
void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) {
  BLOCK_IT block_it = blocks;    //iterator
  ROW_IT row_it;                 //row iterator

  int num_rows = 0;
  int num_rows_all = 0;
  int num_blocks = 0;
  int num_blocks_all = 0;
  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
       block_it.forward()) {
    BLOCK* block = block_it.data();
    if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
      cleanup_nontext_block(block);
      continue;
    }
    num_rows = 0;
    num_rows_all = 0;
    if (clean_noise) {
      row_it.set_to_list(block->row_list());
      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
        ROW* row = row_it.data();
        ++num_rows_all;
        clean_small_noise_from_words(row);
        if ((textord_noise_rejrows && !row->word_list()->empty() &&
             clean_noise_from_row(row)) ||
            row->word_list()->empty()) {
          delete row_it.extract();  // lose empty row.
        } else {
          if (textord_noise_rejwords)
            clean_noise_from_words(row_it.data());
          if (textord_blshift_maxshift >= 0)
            tweak_row_baseline(row, textord_blshift_maxshift,
                               textord_blshift_xfraction);
          ++num_rows;
        }
      }
    }
    if (block->row_list()->empty()) {
      delete block_it.extract();  // Lose empty text blocks.
    } else {
      ++num_blocks;
    }
    ++num_blocks_all;
    if (textord_noise_debug)
      tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all);
  }
  if (textord_noise_debug)
    tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all);
}
void Textord::cleanup_blocks(                    //remove empties
                             BLOCK_LIST *blocks  //list
                            ) {
  BLOCK_IT block_it = blocks;    //iterator
  ROW_IT row_it;                 //row iterator

  int num_rows = 0;
  int num_rows_all = 0;
  int num_blocks = 0;
  int num_blocks_all = 0;
  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
       block_it.forward ()) {
    num_rows = 0;
    num_rows_all = 0;
    row_it.set_to_list (block_it.data ()->row_list ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      ++num_rows_all;
      clean_small_noise_from_words(row_it.data());
      if ((textord_noise_rejrows
           && !row_it.data ()->word_list ()->empty ()
           && clean_noise_from_row (row_it.data ()))
          || row_it.data ()->word_list ()->empty ())
        delete row_it.extract ();//lose empty row
      else {
        if (textord_noise_rejwords)
          clean_noise_from_words (row_it.data ());
        if (textord_blshift_maxshift >= 0)
          tweak_row_baseline(row_it.data(),
                             textord_blshift_maxshift,
                             textord_blshift_xfraction);
        ++num_rows;
      }
    }
    if (block_it.data()->row_list()->empty() &&
        (block_it.data()->poly_block() == NULL ||
         block_it.data()->poly_block()->IsText())) {
      delete block_it.extract();  // Lose empty text blocks but not other types.
    } else {
      ++num_blocks;
    }
    ++num_blocks_all;
    if (textord_noise_debug)
      tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all);
  }
  if (textord_noise_debug)
    tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all);
}