Exemplo n.º 1
0
inT32 row_words2(                  //compute space size
                 TO_BLOCK *block,  //block it came from
                 TO_ROW *row,      //row to operate on
                 inT32 maxwidth,   //max expected space size
                 FCOORD rotation,  //for drawing
                 BOOL8 testing_on  //for debug
                ) {
  BOOL8 testing_row;             //contains testpt
  BOOL8 prev_valid;              //if decent size
  BOOL8 this_valid;              //current blob big enough
  inT32 prev_x;                  //end of prev blob
  inT32 min_width;               //min interesting width
  inT32 valid_count;             //good gaps
  inT32 total_count;             //total gaps
  inT32 cluster_count;           //no of clusters
  inT32 prev_count;              //previous cluster_count
  inT32 gap_index;               //which cluster
  inT32 smooth_factor;           //for smoothing stats
  BLOBNBOX *blob;                //current blob
  float lower, upper;            //clustering parameters
  ICOORD testpt;
  TBOX blob_box;                  //bounding box
                                 //iterator
  BLOBNBOX_IT blob_it = row->blob_list ();
  STATS gap_stats (0, maxwidth);
                                 //gap sizes
  float gaps[BLOCK_STATS_CLUSTERS];
  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
  //clusters

  testpt = ICOORD (textord_test_x, textord_test_y);
  smooth_factor =
    (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5);
  //      if (testing_on)
  //              tprintf("Row smooth factor=%d\n",smooth_factor);
  prev_valid = FALSE;
  prev_x = -MAX_INT16;
  testing_row = FALSE;
                                 //min blob size
  min_width = (inT32) block->pr_space;
  total_count = 0;
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    blob = blob_it.data ();
    if (!blob->joined_to_prev ()) {
      blob_box = blob->bounding_box ();
      this_valid = blob_box.width () >= min_width;
      this_valid = TRUE;
      if (this_valid && prev_valid
      && blob_box.left () - prev_x < maxwidth) {
        gap_stats.add (blob_box.left () - prev_x, 1);
      }
      total_count++;             //count possibles
      prev_x = blob_box.right ();
      prev_valid = this_valid;
    }
  }
  valid_count = gap_stats.get_total ();
  if (valid_count < total_count * textord_words_minlarge) {
    gap_stats.clear ();
    prev_x = -MAX_INT16;
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
    blob_it.forward ()) {
      blob = blob_it.data ();
      if (!blob->joined_to_prev ()) {
        blob_box = blob->bounding_box ();
        if (blob_box.left () - prev_x < maxwidth) {
          gap_stats.add (blob_box.left () - prev_x, 1);
        }
        prev_x = blob_box.right ();
      }
    }
  }
  if (gap_stats.get_total () == 0) {
    row->min_space = 0;          //no evidence
    row->max_nonspace = 0;
    return 0;
  }

  cluster_count = 0;
  lower = block->xheight * words_initial_lower;
  upper = block->xheight * words_initial_upper;
  gap_stats.smooth (smooth_factor);
  do {
    prev_count = cluster_count;
    cluster_count = gap_stats.cluster (lower, upper,
      textord_spacesize_ratioprop,
      BLOCK_STATS_CLUSTERS, cluster_stats);
  }
  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
  if (cluster_count < 1) {
    row->min_space = 0;
    row->max_nonspace = 0;
    return 0;
  }
  for (gap_index = 0; gap_index < cluster_count; gap_index++)
    gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
  //get medians
  if (testing_on) {
    tprintf ("cluster_count=%d:", cluster_count);
    for (gap_index = 0; gap_index < cluster_count; gap_index++)
      tprintf (" %g(%d)", gaps[gap_index],
        cluster_stats[gap_index + 1].get_total ());
    tprintf ("\n");
  }

  //Try to find proportional non-space and space for row.
  for (gap_index = 0; gap_index < cluster_count
    && gaps[gap_index] > block->max_nonspace; gap_index++);
  if (gap_index < cluster_count)
    lower = gaps[gap_index];     //most frequent below
  else {
    if (testing_on)
      tprintf ("No cluster below block threshold!, using default=%g\n",
        block->pr_nonsp);
    lower = block->pr_nonsp;
  }
  for (gap_index = 0; gap_index < cluster_count
    && gaps[gap_index] <= block->max_nonspace; gap_index++);
  if (gap_index < cluster_count)
    upper = gaps[gap_index];     //most frequent above
  else {
    if (testing_on)
      tprintf ("No cluster above block threshold!, using default=%g\n",
        block->pr_space);
    upper = block->pr_space;
  }
  row->min_space =
    (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread);
  row->max_nonspace =
    (inT32) floor (lower + (upper - lower) * textord_words_definite_spread);
  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
  row->space_size = upper;
  row->kern_size = lower;
  if (testing_on) {
    if (testing_row) {
      tprintf ("GAP STATS\n");
      gap_stats.print (stdout, TRUE);
      tprintf ("SPACE stats\n");
      cluster_stats[2].print (stdout, FALSE);
      tprintf ("NONSPACE stats\n");
      cluster_stats[1].print (stdout, FALSE);
    }
    tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
      row->intercept (), row->min_space, upper,
      row->max_nonspace, lower);
  }
  return 1;
}
Exemplo n.º 2
0
#include "mfcpch.h"
#include <string.h>
#ifdef __UNIX__
#include <assert.h>
#endif
#include "coutln.h"
#include <allheaders.h>

// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif

ELISTIZE (C_OUTLINE)
ICOORD C_OUTLINE::step_coords[4] = {
  ICOORD (-1, 0), ICOORD (0, -1), ICOORD (1, 0), ICOORD (0, 1)
};

/**********************************************************************
 * C_OUTLINE::C_OUTLINE
 *
 * Constructor to build a C_OUTLINE from a CRACKEDGE LOOP.
 **********************************************************************/

C_OUTLINE::C_OUTLINE (
//constructor
CRACKEDGE * startpt,             //outline to convert
ICOORD bot_left,                 //bounding box
ICOORD top_right, inT16 length   //length of loop
):box (bot_left, top_right), start (startpt->pos) {
  inT16 stepindex;               //index to step
Exemplo n.º 3
0
inT32 row_words(                  //compute space size
                TO_BLOCK *block,  //block it came from
                TO_ROW *row,      //row to operate on
                inT32 maxwidth,   //max expected space size
                FCOORD rotation,  //for drawing
                BOOL8 testing_on  //for debug
               ) {
  BOOL8 testing_row;             //contains testpt
  BOOL8 prev_valid;              //if decent size
  BOOL8 this_valid;              //current blob big enough
  inT32 prev_x;                  //end of prev blob
  inT32 min_gap;                 //min interesting gap
  inT32 cluster_count;           //no of clusters
  inT32 gap_index;               //which cluster
  inT32 smooth_factor;           //for smoothing stats
  BLOBNBOX *blob;                //current blob
  float lower, upper;            //clustering parameters
  float gaps[3];                 //gap clusers
  ICOORD testpt;
  TBOX blob_box;                  //bounding box
                                 //iterator
  BLOBNBOX_IT blob_it = row->blob_list ();
  STATS gap_stats (0, maxwidth);
  STATS cluster_stats[4];        //clusters

  testpt = ICOORD (textord_test_x, textord_test_y);
  smooth_factor =
    (inT32) (block->xheight * textord_wordstats_smooth_factor + 1.5);
  //      if (testing_on)
  //              tprintf("Row smooth factor=%d\n",smooth_factor);
  prev_valid = FALSE;
  prev_x = -MAX_INT32;
  testing_row = FALSE;
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    blob = blob_it.data ();
    blob_box = blob->bounding_box ();
    if (blob_box.contains (testpt))
      testing_row = TRUE;
    gap_stats.add (blob_box.width (), 1);
  }
  min_gap = (inT32) floor (gap_stats.ile (textord_words_width_ile));
  gap_stats.clear ();
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    blob = blob_it.data ();
    if (!blob->joined_to_prev ()) {
      blob_box = blob->bounding_box ();
      //                      this_valid=blob_box.width()>=min_gap;
      this_valid = TRUE;
      if (this_valid && prev_valid
      && blob_box.left () - prev_x < maxwidth) {
        gap_stats.add (blob_box.left () - prev_x, 1);
      }
      prev_x = blob_box.right ();
      prev_valid = this_valid;
    }
  }
  if (gap_stats.get_total () == 0) {
    row->min_space = 0;          //no evidence
    row->max_nonspace = 0;
    return 0;
  }
  gap_stats.smooth (smooth_factor);
  lower = row->xheight * textord_words_initial_lower;
  upper = row->xheight * textord_words_initial_upper;
  cluster_count = gap_stats.cluster (lower, upper,
    textord_spacesize_ratioprop, 3,
    cluster_stats);
  while (cluster_count < 2 && ceil (lower) < floor (upper)) {
                                 //shrink gap
    upper = (upper * 3 + lower) / 4;
    lower = (lower * 3 + upper) / 4;
    cluster_count = gap_stats.cluster (lower, upper,
      textord_spacesize_ratioprop, 3,
      cluster_stats);
  }
  if (cluster_count < 2) {
    row->min_space = 0;          //no evidence
    row->max_nonspace = 0;
    return 0;
  }
  for (gap_index = 0; gap_index < cluster_count; gap_index++)
    gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
  //get medians
  if (cluster_count > 2) {
    if (testing_on && textord_show_initial_words) {
      tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
        row->intercept (),
        cluster_stats[1].ile (0.5),
        cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
    }
    lower = gaps[0];
    if (gaps[1] > lower) {
      upper = gaps[1];           //prefer most frequent
      if (upper < block->xheight * textord_words_min_minspace
      && gaps[2] > gaps[1]) {
        upper = gaps[2];
      }
    }
    else if (gaps[2] > lower
      && gaps[2] >= block->xheight * textord_words_min_minspace)
      upper = gaps[2];
    else if (lower >= block->xheight * textord_words_min_minspace) {
      upper = lower;             //not nice
      lower = gaps[1];
      if (testing_on && textord_show_initial_words) {
        tprintf ("Had to switch most common from lower to upper!!\n");
        gap_stats.print (stdout, TRUE);
      }
    }
    else {
      row->min_space = 0;        //no evidence
      row->max_nonspace = 0;
      return 0;
    }
  }
  else {
    if (gaps[1] < gaps[0]) {
      if (testing_on && textord_show_initial_words) {
        tprintf ("Had to switch most common from lower to upper!!\n");
        gap_stats.print (stdout, TRUE);
      }
      lower = gaps[1];
      upper = gaps[0];
    }
    else {
      upper = gaps[1];
      lower = gaps[0];
    }
  }
  if (upper < block->xheight * textord_words_min_minspace) {
    row->min_space = 0;          //no evidence
    row->max_nonspace = 0;
    return 0;
  }
  if (upper * 3 < block->min_space * 2 + block->max_nonspace
  || lower * 3 > block->min_space * 2 + block->max_nonspace) {
    if (testing_on && textord_show_initial_words) {
      tprintf ("Disagreement between block and row at %g!!\n",
        row->intercept ());
      tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
      gap_stats.print (stdout, TRUE);
    }
  }
  row->min_space =
    (inT32) ceil (upper - (upper - lower) * textord_words_definite_spread);
  row->max_nonspace =
    (inT32) floor (lower + (upper - lower) * textord_words_definite_spread);
  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
  row->space_size = upper;
  row->kern_size = lower;
  if (testing_on && textord_show_initial_words) {
    if (testing_row) {
      tprintf ("GAP STATS\n");
      gap_stats.print (stdout, TRUE);
      tprintf ("SPACE stats\n");
      cluster_stats[2].print (stdout, FALSE);
      tprintf ("NONSPACE stats\n");
      cluster_stats[1].print (stdout, FALSE);
    }
    tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
      row->intercept (), row->min_space, upper,
      row->max_nonspace, lower);
  }
  return cluster_stats[2].get_total ();
}
Exemplo n.º 4
0
void BLOBNBOX::chop(                        //chop blobs
                    BLOBNBOX_IT *start_it,  //location of this
                    BLOBNBOX_IT *end_it,    //iterator
                    FCOORD rotation,        //for landscape
                    float xheight           //of line
                   ) {
  inT16 blobcount;               //no of blobs
  BLOBNBOX *newblob;             //fake blob
  BLOBNBOX *blob;                //current blob
  inT16 blobindex;               //number of chop
  inT16 leftx;                   //left edge of blob
  float blobwidth;               //width of each
  float rightx;                  //right edge to scan
  float ymin, ymax;              //limits of new blob
  float test_ymin, test_ymax;    //limits of part blob
  ICOORD bl, tr;                 //corners of box
  BLOBNBOX_IT blob_it;           //blob iterator

                                 //get no of chops
  blobcount = (inT16) floor (box.width () / xheight);
  if (blobcount > 1 && (blob_ptr != NULL || cblob_ptr != NULL)) {
                                 //width of each
    blobwidth = (float) (box.width () + 1) / blobcount;
    for (blobindex = blobcount - 1, rightx = box.right ();
    blobindex >= 0; blobindex--, rightx -= blobwidth) {
      ymin = (float) MAX_INT32;
      ymax = (float) -MAX_INT32;
      blob_it = *start_it;
      do {
        blob = blob_it.data ();
        if (blob->blob_ptr != NULL)
          find_blob_limits (blob->blob_ptr, rightx - blobwidth, rightx,
            rotation, test_ymin, test_ymax);
        else
          find_cblob_vlimits (blob->cblob_ptr, rightx - blobwidth,
            rightx,
            /*rotation, */ test_ymin, test_ymax);
        blob_it.forward ();
        if (test_ymin < ymin)
          ymin = test_ymin;
        if (test_ymax > ymax)
          ymax = test_ymax;
      }
      while (blob != end_it->data ());
      if (ymin < ymax) {
        leftx = (inT16) floor (rightx - blobwidth);
        if (leftx < box.left ())
          leftx = box.left ();   //clip to real box
        bl = ICOORD (leftx, (inT16) floor (ymin));
        tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax));
        if (blobindex == 0)
          box = TBOX (bl, tr);    //change box
        else {
          newblob = new BLOBNBOX;
                                 //box is all it has
          newblob->box = TBOX (bl, tr);
                                 //stay on current
          end_it->add_after_stay_put (newblob);
        }
      }
    }
  }
}