Example #1
0
// Return true if this vector is the same side, overlaps, and close
// enough to the other to be merged.
bool TabVector::SimilarTo(const ICOORD& vertical,
                          const TabVector& other, BlobGrid* grid) const {
  if ((IsRightTab() && other.IsRightTab()) ||
      (IsLeftTab() && other.IsLeftTab())) {
    // If they don't overlap, at least in extensions, then there is no chance.
    if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0)
      return false;
    // A fast approximation to the scale factor of the sort_key_.
    int v_scale = abs(vertical.y());
    if (v_scale == 0)
      v_scale = 1;
    // If they are close enough, then OK.
    if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
        sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_)
      return true;
    // Ragged tabs get a bigger threshold.
    if (!IsRagged() || !other.IsRagged() ||
        sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
        sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_)
      return false;
    if (grid == NULL) {
      // There is nothing else to test!
      return true;
    }
    // If there is nothing in the rectangle between the vector that is going to
    // move, and the place it is moving to, then they can be merged.
    // Setup a vertical search for any blob.
    const TabVector* mover = (IsRightTab() &&
       sort_key_ < other.sort_key_) ? this : &other;
    int top_y = mover->endpt_.y();
    int bottom_y = mover->startpt_.y();
    int left = MIN(mover->XAtY(top_y), mover->XAtY(bottom_y));
    int right = MAX(mover->XAtY(top_y), mover->XAtY(bottom_y));
    int shift = abs(sort_key_ - other.sort_key_) / v_scale;
    if (IsRightTab()) {
      right += shift;
    } else {
      left -= shift;
    }

    GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
    vsearch.StartVerticalSearch(left, right, top_y);
    BLOBNBOX* blob;
    while ((blob = vsearch.NextVerticalSearch(true)) != NULL) {
      TBOX box = blob->bounding_box();
      if (box.top() > bottom_y)
        return true;  // Nothing found.
      if (box.bottom() < top_y)
        continue;  // Doesn't overlap.
      int left_at_box = XAtY(box.bottom());
      int right_at_box = left_at_box;
      if (IsRightTab())
        right_at_box += shift;
      else
        left_at_box -= shift;
      if (MIN(right_at_box, box.right()) > MAX(left_at_box, box.left()))
        return false;
    }
    return true;  // Nothing found.
  }
  return false;
}
Example #2
0
/*************************************************************************
 * transform_to_next_perm()
 * Examines the current word list to find the smallest word gap size. Then walks
 * the word list closing any gaps of this size by either inserted new
 * combination words, or extending existing ones.
 *
 * The routine COULD be limited to stop it building words longer than N blobs.
 *
 * If there are no more gaps then it DELETES the entire list and returns the
 * empty list to cause termination.
 *************************************************************************/
void transform_to_next_perm(WERD_RES_LIST &words) {
  WERD_RES_IT word_it(&words);
  WERD_RES_IT prev_word_it(&words);
  WERD_RES *word;
  WERD_RES *prev_word;
  WERD_RES *combo;
  WERD *copy_word;
  inT16 prev_right = -1;
  TBOX box;
  inT16 gap;
  inT16 min_gap = MAX_INT16;

  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
    word = word_it.data ();
    if (!word->part_of_combo) {
      box = word->word->bounding_box ();
      if (prev_right >= 0) {
        gap = box.left () - prev_right;
        if (gap < min_gap)
          min_gap = gap;
      }
      prev_right = box.right ();
    }
  }
  if (min_gap < MAX_INT16) {
    prev_right = -1;             //back to start
    word_it.set_to_list (&words);
    for (;                       //cant use cycle pt due to inserted combos at start of list
    (prev_right < 0) || !word_it.at_first (); word_it.forward ()) {
      word = word_it.data ();
      if (!word->part_of_combo) {
        box = word->word->bounding_box ();
        if (prev_right >= 0) {
          gap = box.left () - prev_right;
          if (gap <= min_gap) {
            prev_word = prev_word_it.data ();
            if (prev_word->combination)
              combo = prev_word;
            else {
              /* Make a new combination and insert before the first word being joined */
              copy_word = new WERD;
              *copy_word = *(prev_word->word);
              //deep copy
              combo = new WERD_RES (copy_word);
              combo->combination = TRUE;
              combo->x_height = prev_word->x_height;
              prev_word->part_of_combo = TRUE;
              prev_word_it.add_before_then_move (combo);
            }
            combo->word->set_flag (W_EOL, word->word->flag (W_EOL));
            if (word->combination) {
              combo->word->join_on (word->word);
              //Move blbs to combo
                                 //old combo no longer needed
              delete word_it.extract ();
            }
            else {
                                 //Cpy current wd to combo
              combo->copy_on (word);
              word->part_of_combo = TRUE;
            }
            combo->done = FALSE;
            if (combo->outword != NULL) {
              delete combo->outword;
              delete combo->best_choice;
              delete combo->raw_choice;
              combo->outword = NULL;
              combo->best_choice = NULL;
              combo->raw_choice = NULL;
            }
          }
          else
                                 //catch up
              prev_word_it = word_it;
        }
        prev_right = box.right ();
      }
    }
  }
  else
    words.clear ();              //signal termination
}
Example #3
0
/*************************************************************************
 * uniformly_spaced()
 * Return true if one of the following are true:
 *    - All inter-char gaps are the same width
 *	- The largest gap is no larger than twice the mean/median of the others
 *	- The largest gap is < 64/5 = 13 and all others are <= 0
 * **** REMEMBER - WE'RE NOW WORKING WITH A BLN WERD !!!
 *************************************************************************/
BOOL8 uniformly_spaced(  //sensible word
                       WERD_RES *word) {
  PBLOB_IT blob_it;
  TBOX box;
  inT16 prev_right = -MAX_INT16;
  inT16 gap;
  inT16 max_gap = -MAX_INT16;
  inT16 max_gap_count = 0;
  STATS gap_stats (0, MAXSPACING);
  BOOL8 result;
  const ROW *row = word->denorm.row ();
  float max_non_space;
  float normalised_max_nonspace;
  inT16 i = 0;
  inT16 offset = 0;
  STRING punct_chars = "\"`',.:;";

  blob_it.set_to_list (word->outword->blob_list ());

  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    box = blob_it.data ()->bounding_box ();
    if ((prev_right > -MAX_INT16) &&
      (!fixsp_ignore_punct ||
      (!punct_chars.contains (word->best_choice->string ()
                              [offset - word->best_choice->lengths()[i - 1]]) &&
    !punct_chars.contains (word->best_choice->string ()[offset])))) {
      gap = box.left () - prev_right;
      if (gap < max_gap)
        gap_stats.add (gap, 1);
      else if (gap == max_gap)
        max_gap_count++;
      else {
        if (max_gap_count > 0)
          gap_stats.add (max_gap, max_gap_count);
        max_gap = gap;
        max_gap_count = 1;
      }
    }
    prev_right = box.right ();
    offset += word->best_choice->lengths()[i++];
  }

  max_non_space = (row->space () + 3 * row->kern ()) / 4;
  normalised_max_nonspace = max_non_space * bln_x_height / row->x_height ();

  result = ((gap_stats.get_total () == 0) ||
    (max_gap <= normalised_max_nonspace) ||
    ((gap_stats.get_total () > 2) &&
    (max_gap <= 2 * gap_stats.median ())) ||
    ((gap_stats.get_total () <= 2) &&
    (max_gap <= 2 * gap_stats.mean ())));
  #ifndef SECURE_NAMES
  if ((debug_fix_space_level > 1)) {
    if (result)
      tprintf
        ("ACCEPT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n",
        word->best_choice->string ().string (), normalised_max_nonspace,
        max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (),
        gap_stats.median ());
    else
      tprintf
        ("REJECT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n",
        word->best_choice->string ().string (), normalised_max_nonspace,
        max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (),
        gap_stats.median ());
  }
  #endif

  return result;
}
Example #4
0
bool TBOX::almost_equal(const TBOX &box, int tolerance) const {
  return (abs(left() - box.left()) <= tolerance &&
          abs(right() - box.right()) <= tolerance &&
          abs(top() - box.top()) <= tolerance &&
          abs(bottom() - box.bottom()) <= tolerance);
}
Example #5
0
// Fills in the x-height range accepted by the given unichar_id, given its
// bounding box in the usual baseline-normalized coordinates, with some
// initial crude x-height estimate (such as word size) and this denoting the
// transformation that was used.
void DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset,
                          const TBOX& bbox,
                          float* min_xht, float* max_xht, float* yshift) const {
  // Default return -- accept anything.
  *yshift = 0.0f;
  *min_xht = 0.0f;
  *max_xht = MAX_FLOAT32;

  if (!unicharset.top_bottom_useful())
    return;

  // Clip the top and bottom to the limit of normalized feature space.
  int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
  int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1);
  // A tolerance of yscale corresponds to 1 pixel in the image.
  double tolerance = y_scale();
  // If the script doesn't have upper and lower-case characters, widen the
  // tolerance to allow sloppy baseline/x-height estimates.
  if (!unicharset.script_has_upper_lower())
    tolerance = y_scale() * kSloppyTolerance;

  int min_bottom, max_bottom, min_top, max_top;
  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
                            &min_top, &max_top);

  // Calculate the scale factor we'll use to get to image y-pixels
  double midx = (bbox.left() + bbox.right()) / 2.0;
  double ydiff = (bbox.top() - bbox.bottom()) + 2.0;
  FCOORD mid_bot(midx, bbox.bottom()), tmid_bot;
  FCOORD mid_high(midx, bbox.bottom() + ydiff), tmid_high;
  DenormTransform(NULL, mid_bot, &tmid_bot);
  DenormTransform(NULL, mid_high, &tmid_high);

  // bln_y_measure * yscale = image_y_measure
  double yscale = tmid_high.pt_to_pt_dist(tmid_bot) / ydiff;

  // Calculate y-shift
  int bln_yshift = 0, bottom_shift = 0, top_shift = 0;
  if (bottom < min_bottom - tolerance) {
    bottom_shift = bottom - min_bottom;
  } else if (bottom > max_bottom + tolerance) {
    bottom_shift = bottom - max_bottom;
  }
  if (top < min_top - tolerance) {
    top_shift = top - min_top;
  } else if (top > max_top + tolerance) {
    top_shift = top - max_top;
  }
  if ((top_shift >= 0 && bottom_shift > 0) ||
      (top_shift < 0 && bottom_shift < 0)) {
    bln_yshift = (top_shift + bottom_shift) / 2;
  }
  *yshift = bln_yshift * yscale;

  // To help very high cap/xheight ratio fonts accept the correct x-height,
  // and to allow the large caps in small caps to accept the xheight of the
  // small caps, add kBlnBaselineOffset to chars with a maximum max, and have
  // a top already at a significantly high position.
  if (max_top == kBlnCellHeight - 1 &&
      top > kBlnCellHeight - kBlnBaselineOffset / 2)
    max_top += kBlnBaselineOffset;
  top -= bln_yshift;
  int height = top - kBlnBaselineOffset;
  double min_height = min_top - kBlnBaselineOffset - tolerance;
  double max_height = max_top - kBlnBaselineOffset + tolerance;

  // We shouldn't try calculations if the characters are very short (for example
  // for punctuation).
  if (min_height > kBlnXHeight / 8 && height > 0) {
    float result = height * kBlnXHeight * yscale / min_height;
    *max_xht = result + kFinalPixelTolerance;
    result = height * kBlnXHeight * yscale / max_height;
    *min_xht = result - kFinalPixelTolerance;
  }
}
void char_clip_word(                            //
                    WERD *word,                 //word to be processed
                    IMAGE &bin_image,           //whole image
                    PIXROW_LIST *&pixrow_list,  //pixrows built
                    IMAGELINE *&imlines,        //lines cut from image
                    TBOX &pix_box                //box defining imlines
                   ) {
  TBOX word_box = word->bounding_box ();
  PBLOB_LIST *blob_list;
  PBLOB_IT blob_it;
  PIXROW_IT pixrow_it;
  inT16 pix_offset;              //Y pos of pixrow[0]
  inT16 row_height;              //No of pix rows
  inT16 imlines_x_offset;
  PIXROW *prev;
  PIXROW *next;
  PIXROW *current;
  BOOL8 changed;                 //still improving
  BOOL8 just_changed;            //still improving
  inT16 iteration_count = 0;
  inT16 foreground_colour;

  if (word->flag (W_INVERSE))
    foreground_colour = 1;
  else
    foreground_colour = 0;

  /* Define region for max pixrow expansion */
  pix_box = word_box;
  pix_box.move_bottom_edge (-pix_word_margin);
  pix_box.move_top_edge (pix_word_margin);
  pix_box.move_left_edge (-pix_word_margin);
  pix_box.move_right_edge (pix_word_margin);
  pix_box -= TBOX (ICOORD (0, 0 + BUG_OFFSET),
    ICOORD (bin_image.get_xsize (),
    bin_image.get_ysize () - BUG_OFFSET));

  /* Generate pixrows list */

  pix_offset = pix_box.bottom ();
  row_height = pix_box.height ();
  blob_list = word->blob_list ();
  blob_it.set_to_list (blob_list);

  pixrow_list = new PIXROW_LIST;
  pixrow_it.set_to_list (pixrow_list);

  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    PIXROW *row = new PIXROW (pix_offset, row_height, blob_it.data ());
    ASSERT_HOST (!row->
      bad_box (bin_image.get_xsize (), bin_image.get_ysize ()));
    pixrow_it.add_after_then_move (row);
  }

  imlines = generate_imlines (bin_image, pix_box);

  /* Contract pixrows - shrink min and max back to black pixels */

  imlines_x_offset = pix_box.left ();

  pixrow_it.move_to_first ();
  for (pixrow_it.mark_cycle_pt ();
  !pixrow_it.cycled_list (); pixrow_it.forward ()) {
    ASSERT_HOST (!pixrow_it.data ()->
      bad_box (bin_image.get_xsize (), bin_image.get_ysize ()));
    pixrow_it.data ()->contract (imlines, imlines_x_offset,
      foreground_colour);
    ASSERT_HOST (!pixrow_it.data ()->
      bad_box (bin_image.get_xsize (), bin_image.get_ysize ()));
  }

  /* Expand pixrows iteratively 1 pixel at a time */
  do {
    changed = FALSE;
    pixrow_it.move_to_first ();
    prev = NULL;
    current = NULL;
    next = pixrow_it.data ();
    for (pixrow_it.mark_cycle_pt ();
    !pixrow_it.cycled_list (); pixrow_it.forward ()) {
      prev = current;
      current = next;
      if (pixrow_it.at_last ())
        next = NULL;
      else
        next = pixrow_it.data_relative (1);
      just_changed = current->extend (imlines, pix_box, prev, next,
        foreground_colour);
      ASSERT_HOST (!current->
        bad_box (bin_image.get_xsize (),
        bin_image.get_ysize ()));
      changed = changed || just_changed;
    }
    iteration_count++;
  }
  while (changed);
}
Example #7
0
// Collects edges into the given bounding box, LLSQ accumulator and/or x_coords,
// y_coords vectors.
// For a description of x_coords/y_coords, see GetEdgeCoords above.
// Startpt to lastpt, inclusive, MUST have the same src_outline member,
// which may be NULL. The vector from lastpt to its next is included in
// the accumulation. Hidden edges should be excluded by the caller.
// The input denorm should be the normalizations that have been applied from
// the image to the current state of the TBLOB from which startpt, lastpt come.
// box is the bounding box of the blob from which the EDGEPTs are taken and
// indices into x_coords, y_coords are offset by box.botleft().
static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt,
                              const DENORM& denorm, const TBOX& box,
                              TBOX* bounding_box,
                              LLSQ* accumulator,
                              GenericVector<GenericVector<int> > *x_coords,
                              GenericVector<GenericVector<int> > *y_coords) {
  const C_OUTLINE* outline = startpt->src_outline;
  int x_limit = box.width() - 1;
  int y_limit = box.height() - 1;
  if (outline != NULL) {
    // Use higher-resolution edge points stored on the outline.
    // The outline coordinates may not match the binary image because of the
    // rotation for vertical text lines, but the root_denorm IS the matching
    // start of the DENORM chain.
    const DENORM* root_denorm = denorm.RootDenorm();
    int step_length = outline->pathlength();
    int start_index = startpt->start_step;
    // Note that if this run straddles the wrap-around point of the outline,
    // that lastpt->start_step may have a lower index than startpt->start_step,
    // and we want to use an end_index that allows us to use a positive
    // increment, so we add step_length if necessary, but that may be beyond the
    // bounds of the outline steps/ due to wrap-around, so we use % step_length
    // everywhere, except for start_index.
    int end_index = lastpt->start_step + lastpt->step_count;
    if (end_index <= start_index)
      end_index += step_length;
    // pos is the integer coordinates of the binary image steps.
    ICOORD pos = outline->position_at_index(start_index);
    FCOORD origin(box.left(), box.bottom());
    // f_pos is a floating-point version of pos that offers improved edge
    // positioning using greyscale information or smoothing of edge steps.
    FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, start_index);
    // pos_normed is f_pos after the appropriate normalization, and relative
    // to origin.
    // prev_normed is the previous value of pos_normed.
    FCOORD prev_normed;
    denorm.NormTransform(root_denorm, f_pos, &prev_normed);
    prev_normed -= origin;
    for (int index = start_index; index < end_index; ++index) {
      ICOORD step = outline->step(index % step_length);
      // Only use the point if its edge strength is positive. This excludes
      // points that don't provide useful information, eg
      // ___________
      //            |___________
      // The vertical step provides only noisy, damaging information, as even
      // with a greyscale image, the positioning of the edge there may be a
      // fictitious extrapolation, so previous processing has eliminated it.
      if (outline->edge_strength_at_index(index % step_length) > 0) {
        FCOORD f_pos = outline->sub_pixel_pos_at_index(pos,
                                                       index % step_length);
        FCOORD pos_normed;
        denorm.NormTransform(root_denorm, f_pos, &pos_normed);
        pos_normed -= origin;
        // Accumulate the information that is selected by the caller.
        if (bounding_box != NULL) {
          SegmentBBox(pos_normed, prev_normed, bounding_box);
        }
        if (accumulator != NULL) {
          SegmentLLSQ(pos_normed, prev_normed, accumulator);
        }
        if (x_coords != NULL && y_coords != NULL) {
          SegmentCoords(pos_normed, prev_normed, x_limit, y_limit,
                        x_coords, y_coords);
        }
        prev_normed = pos_normed;
      }
      pos += step;
    }
  } else {
    // There is no outline, so we are forced to use the polygonal approximation.
    const EDGEPT* endpt = lastpt->next;
    const EDGEPT* pt = startpt;
    do {
      FCOORD next_pos(pt->next->pos.x - box.left(),
                      pt->next->pos.y - box.bottom());
      FCOORD pos(pt->pos.x - box.left(), pt->pos.y - box.bottom());
      if (bounding_box != NULL) {
        SegmentBBox(next_pos, pos, bounding_box);
      }
      if (accumulator != NULL) {
        SegmentLLSQ(next_pos, pos, accumulator);
      }
      if (x_coords != NULL && y_coords != NULL) {
        SegmentCoords(next_pos, pos, x_limit, y_limit, x_coords, y_coords);
      }
    } while ((pt = pt->next) != endpt);
  }
}
BOOL8 PIXROW::extend(               //image array
                     IMAGELINE *imlines,
                     TBOX &imbox,
                     PIXROW *prev,  //for prev blob
                     PIXROW *next,  //for next blob
                     inT16 foreground_colour) {
  inT16 i;
  inT16 x_offset = imbox.left ();
  inT16 limit;
  inT16 left_limit;
  inT16 right_limit;
  uinT8 *pixels = NULL;
  uinT8 *pixels_below = NULL;    //row below current
  uinT8 *pixels_above = NULL;    //row above current
  BOOL8 changed = FALSE;

  pixels_above = imlines[0].pixels;
  for (i = 0; i < row_count; i++) {
    pixels_below = pixels;
    pixels = pixels_above;
    if (i < (row_count - 1))
      pixels_above = imlines[i + 1].pixels;
    else
      pixels_above = NULL;

    /* Extend Left by one pixel*/
    if (prev == NULL || prev->max[i] < prev->min[i])
      limit = imbox.left ();
    else
      limit = prev->max[i] + 1;
    if ((min[i] <= max[i]) &&
      (min[i] > limit) &&
    (pixels[min[i] - 1 - x_offset] == foreground_colour)) {
      min[i]--;
      changed = TRUE;
    }

    /* Extend Right by one pixel*/
    if (next == NULL || next->min[i] > next->max[i])
      limit = imbox.right () - 1;//-1 to index inside pix
    else
      limit = next->min[i] - 1;
    if ((min[i] <= max[i]) &&
      (max[i] < limit) &&
    (pixels[max[i] + 1 - x_offset] == foreground_colour)) {
      max[i]++;
      changed = TRUE;
    }

    /* Extend down by one row */
    if (pixels_below != NULL) {
      if (min[i] < min[i - 1]) { //row goes left of row below
        if (prev == NULL || prev->max[i - 1] < prev->min[i - 1])
          left_limit = min[i];
        else
          left_limit = LARGEST (min[i], prev->max[i - 1] + 1);
      }
      else
        left_limit = min[i - 1];

      if (max[i] > max[i - 1]) { //row goes right of row below
        if (next == NULL || next->min[i - 1] > next->max[i - 1])
          right_limit = max[i];
        else
          right_limit = SMALLEST (max[i], next->min[i - 1] - 1);
      }
      else
        right_limit = max[i - 1];

      while ((left_limit <= right_limit) &&
        (pixels_below[left_limit - x_offset] != foreground_colour))
        left_limit++;            //find black extremity

      if ((left_limit <= right_limit) && (left_limit < min[i - 1])) {
        min[i - 1] = left_limit; //widen left if poss
        changed = TRUE;
      }

      while ((left_limit <= right_limit) &&
        (pixels_below[right_limit - x_offset] != foreground_colour))
        right_limit--;           //find black extremity

      if ((left_limit <= right_limit) && (right_limit > max[i - 1])) {
        max[i - 1] = right_limit;//widen right if poss
        changed = TRUE;
      }
    }

    /* Extend up by one row */
    if (pixels_above != NULL) {
      if (min[i] < min[i + 1]) { //row goes left of row above
        if (prev == NULL || prev->min[i + 1] > prev->max[i + 1])
          left_limit = min[i];
        else
          left_limit = LARGEST (min[i], prev->max[i + 1] + 1);
      }
      else
        left_limit = min[i + 1];

      if (max[i] > max[i + 1]) { //row goes right of row above
        if (next == NULL || next->min[i + 1] > next->max[i + 1])
          right_limit = max[i];
        else
          right_limit = SMALLEST (max[i], next->min[i + 1] - 1);
      }
      else
        right_limit = max[i + 1];

      while ((left_limit <= right_limit) &&
        (pixels_above[left_limit - x_offset] != foreground_colour))
        left_limit++;            //find black extremity

      if ((left_limit <= right_limit) && (left_limit < min[i + 1])) {
        min[i + 1] = left_limit; //widen left if poss
        changed = TRUE;
      }

      while ((left_limit <= right_limit) &&
        (pixels_above[right_limit - x_offset] != foreground_colour))
        right_limit--;           //find black extremity

      if ((left_limit <= right_limit) && (right_limit > max[i + 1])) {
        max[i + 1] = right_limit;//widen right if poss
        changed = TRUE;
      }
    }
  }
  return changed;
}
void PIXROW::char_clip_image(                     //box of imlines extnt
                             IMAGELINE *imlines,
                             TBOX &im_box,
                             ROW *row,            //row containing word
                             IMAGE &clip_image,   //unscaled sq subimage
                             float &baseline_pos  //baseline ht in image
                            ) {
  inT16 clip_image_xsize;        //sub image x size
  inT16 clip_image_ysize;        //sub image y size
  inT16 x_shift;                 //from pixrow to subim
  inT16 y_shift;                 //from pixrow to subim
  TBOX char_pix_box;              //bbox of char pixels
  inT16 y_dest;
  inT16 x_min;
  inT16 x_max;
  inT16 x_min_dest;
  inT16 x_max_dest;
  inT16 x_width;
  inT16 y;

  clip_image_xsize = clip_image.get_xsize ();
  clip_image_ysize = clip_image.get_ysize ();

  char_pix_box = bounding_box ();
  /*
    The y shift is calculated by first finding the coord of the bottom of the
    image relative to the image lines. Then reducing this so by the amount
    relative to the clip image size, necessary to vertically position the
    character.
  */
  y_shift = char_pix_box.bottom () - row_offset -
    (inT16) floor ((clip_image_ysize - char_pix_box.height () + 0.5) / 2);

  /*
    The x_shift is the shift to be applied to the page coord in the pixrow to
    generate a centred char in the clip image.  Thus the left hand edge of the
    char is shifted to the margin width of the centred character.
  */
  x_shift = char_pix_box.left () -
    (inT16) floor ((clip_image_xsize - char_pix_box.width () + 0.5) / 2);

  for (y = 0; y < row_count; y++) {
    /*
      Check that there is something in this row of the source that will fit in the
      sub image.  If there is, reduce x range if necessary, then copy it
    */
    y_dest = y - y_shift;
    if ((min[y] <= max[y]) && (y_dest >= 0) && (y_dest < clip_image_ysize)) {
      x_min = min[y];
      x_min_dest = x_min - x_shift;
      if (x_min_dest < 0) {
        x_min = x_min - x_min_dest;
        x_min_dest = 0;
      }
      x_max = max[y];
      x_max_dest = x_max - x_shift;
      if (x_max_dest > clip_image_xsize - 1) {
        x_max = x_max - (x_max_dest - (clip_image_xsize - 1));
        x_max_dest = clip_image_xsize - 1;
      }
      x_width = x_max - x_min + 1;
      if (x_width > 0) {
        x_min -= im_box.left ();
                                 //offset pixel ptr
        imlines[y].pixels += x_min;
        clip_image.put_line (x_min_dest, y_dest, x_width, imlines + y,
          0);
        imlines[y].init ();      //reset pixel ptr
      }
    }
  }
  /*
    Baseline position relative to clip image: First find the baseline relative
    to the page origin at the x coord of the centre of the character. Then
    make this relative to the character bottom. Finally shift by the margin
    between the bottom of the character and the bottom of the clip image.
  */
  if (row == NULL)
    baseline_pos = 0;            //Not needed
  else
    baseline_pos = row->base_line ((char_pix_box.left () +
      char_pix_box.right ()) / 2.0)
      - char_pix_box.bottom ()
      + ((clip_image_ysize - char_pix_box.height ()) / 2);
}
Example #10
0
/**
 *  word_display()  Word Processor
 *
 *  Display a word according to its display modes
 */
BOOL8 Tesseract::word_display(PAGE_RES_IT* pr_it) {
  WERD_RES* word_res = pr_it->word();
  WERD* word = word_res->word;
  TBOX word_bb;                   // word bounding box
  int word_height;               // ht of word BB
  BOOL8 displayed_something = FALSE;
  float shift;                   // from bot left
  C_BLOB_IT c_it;                // cblob iterator

  if (color_mode != CM_RAINBOW && word_res->box_word != NULL) {
    BoxWord* box_word = word_res->box_word;
    WERD_CHOICE* best_choice = word_res->best_choice;
    int length = box_word->length();
    if (word_res->fontinfo == NULL) return false;
    const FontInfo& font_info = *word_res->fontinfo;
    for (int i = 0; i < length; ++i) {
      ScrollView::Color color = ScrollView::GREEN;
      switch (color_mode) {
        case CM_SUBSCRIPT:
          if (best_choice->BlobPosition(i) == SP_SUBSCRIPT)
            color = ScrollView::RED;
          break;
        case CM_SUPERSCRIPT:
          if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT)
            color = ScrollView::RED;
          break;
        case CM_ITALIC:
          if (font_info.is_italic())
            color = ScrollView::RED;
          break;
        case CM_BOLD:
          if (font_info.is_bold())
            color = ScrollView::RED;
          break;
        case CM_FIXEDPITCH:
          if (font_info.is_fixed_pitch())
            color = ScrollView::RED;
          break;
        case CM_SERIF:
          if (font_info.is_serif())
            color = ScrollView::RED;
          break;
        case CM_SMALLCAPS:
          if (word_res->small_caps)
            color = ScrollView::RED;
          break;
        case CM_DROPCAPS:
          if (best_choice->BlobPosition(i) == SP_DROPCAP)
            color = ScrollView::RED;
          break;
          // TODO(rays) underline is currently completely unsupported.
        case CM_UNDERLINE:
        default:
          break;
      }
      image_win->Pen(color);
      TBOX box = box_word->BlobBox(i);
      image_win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
    }
    return true;
  }
  /*
    Note the double coercions of(COLOUR)((inT32)editor_image_word_bb_color)
    etc. are to keep the compiler happy.
  */
                                 // display bounding box
  if (word->display_flag(DF_BOX)) {
    word->bounding_box().plot(image_win,
     (ScrollView::Color)((inT32)
      editor_image_word_bb_color),
     (ScrollView::Color)((inT32)
      editor_image_word_bb_color));

    ScrollView::Color c = (ScrollView::Color)
       ((inT32) editor_image_blob_bb_color);
    image_win->Pen(c);
    c_it.set_to_list(word->cblob_list());
    for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward())
      c_it.data()->bounding_box().plot(image_win);
    displayed_something = TRUE;
  }

                                 // display edge steps
  if (word->display_flag(DF_EDGE_STEP)) {     // edgesteps available
    word->plot(image_win);      // rainbow colors
    displayed_something = TRUE;
  }

                                 // display poly approx
  if (word->display_flag(DF_POLYGONAL)) {
                                 // need to convert
    TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word);
    tword->plot(image_win);
    delete tword;
    displayed_something = TRUE;
  }

  // Display correct text and blamer information.
  STRING text;
  STRING blame;
  if (word->display_flag(DF_TEXT) && word->text() != NULL) {
    text = word->text();
  }
  if (word->display_flag(DF_BLAMER) &&
      !(word_res->blamer_bundle != NULL &&
        word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) {
    text = "";
    const BlamerBundle *blamer_bundle = word_res->blamer_bundle;
    if (blamer_bundle == NULL) {
      text += "NULL";
    } else {
      text = blamer_bundle->TruthString();
    }
    text += " -> ";
    STRING best_choice_str;
    if (word_res->best_choice == NULL) {
      best_choice_str = "NULL";
    } else {
      word_res->best_choice->string_and_lengths(&best_choice_str, NULL);
    }
    text += best_choice_str;
    IncorrectResultReason reason = (blamer_bundle == NULL) ?
        IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason();
    ASSERT_HOST(reason < IRR_NUM_REASONS)
    blame += " [";
    blame += BlamerBundle::IncorrectReasonName(reason);
    blame += "]";
  }
  if (text.length() > 0) {
    word_bb = word->bounding_box();
    image_win->Pen(ScrollView::RED);
    word_height = word_bb.height();
    int text_height = 0.50 * word_height;
    if (text_height > 20) text_height = 20;
    image_win->TextAttributes("Arial", text_height, false, false, false);
    shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
    image_win->Text(word_bb.left() + shift,
                    word_bb.bottom() + 0.25 * word_height, text.string());
    if (blame.length() > 0) {
      image_win->Text(word_bb.left() + shift,
                      word_bb.bottom() + 0.25 * word_height - text_height,
                      blame.string());
    }

    displayed_something = TRUE;
  }

  if (!displayed_something)      // display BBox anyway
    word->bounding_box().plot(image_win,
     (ScrollView::Color)((inT32) editor_image_word_bb_color),
     (ScrollView::Color)((inT32)
      editor_image_word_bb_color));
  return TRUE;
}
Example #11
0
void fixed_split_coutline(                        //chop the outline
                          C_OUTLINE *srcline,     //source outline
                          inT16 chop_coord,       //place to chop
                          float pitch_error,      //allowed deviation
                          C_OUTLINE_IT *left_it,  //left half of chop
                          C_OUTLINE_IT *right_it  //right half of chop
                         ) {
  C_OUTLINE *child;              //child outline
  TBOX srcbox;                    //box of outline
  C_OUTLINE_LIST left_ch;        //left children
  C_OUTLINE_LIST right_ch;       //right children
  C_OUTLINE_FRAG_LIST left_frags;//chopped fragments
  C_OUTLINE_FRAG_LIST right_frags;;
  C_OUTLINE_IT left_ch_it = &left_ch;
                                 //for whole children
  C_OUTLINE_IT right_ch_it = &right_ch;
                                 //for holes
  C_OUTLINE_IT child_it = srcline->child ();

  srcbox = srcline->bounding_box();
  if (srcbox.left() + srcbox.right() <= chop_coord * 2
      && srcbox.right() < chop_coord + pitch_error) {
    // Whole outline is in the left side or not far over the chop_coord,
    // so put the whole thing on the left.
    left_it->add_after_then_move(srcline);
  } else if (srcbox.left() + srcbox.right() > chop_coord * 2
             && srcbox.left () > chop_coord - pitch_error) {
    // Whole outline is in the right side or not far over the chop_coord,
    // so put the whole thing on the right.
   right_it->add_before_stay_put(srcline);
  } else {
    // Needs real chopping.
    if (fixed_chop_coutline(srcline, chop_coord, pitch_error,
        &left_frags, &right_frags)) {
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        child = child_it.extract();
        srcbox = child->bounding_box();
        if (srcbox.right() < chop_coord) {
          // Whole child is on the left.
          left_ch_it.add_after_then_move(child);
        } else if (srcbox.left() > chop_coord) {
          // Whole child is on the right.
          right_ch_it.add_after_then_move (child);
        } else {
          // No pitch_error is allowed when chopping children to prevent
          // impossible outlines from being created.
          if (fixed_chop_coutline(child, chop_coord, 0.0f,
              &left_frags, &right_frags)) {
            delete child;
          } else {
            if (srcbox.left() + srcbox.right() <= chop_coord * 2)
              left_ch_it.add_after_then_move(child);
            else
              right_ch_it.add_after_then_move(child);
          }
        }
      }
      close_chopped_cfragments(&left_frags, &left_ch, pitch_error, left_it);
      close_chopped_cfragments(&right_frags, &right_ch, pitch_error, right_it);
      ASSERT_HOST(left_ch.empty() && right_ch.empty());
      // No children left.
      delete srcline;            // Smashed up.
    } else {
      // Chop failed. Just use middle coord.
      if (srcbox.left() + srcbox.right() <= chop_coord * 2)
        left_it->add_after_then_move(srcline);  // Stick whole in left.
      else
        right_it->add_before_stay_put(srcline);
    }
  }
}
Example #12
0
// This function takes tif/box pair of files and runs recognition on the image,
// while making sure that the word bounds that tesseract identified roughly
// match to those specified by the input box file. For each word (ngram in a
// single bounding box from the input box file) it outputs the ocred result,
// the correct label, rating and certainty.
    void Tesseract::recog_training_segmented(const STRING &fname,
                                             PAGE_RES *page_res,
                                             volatile ETEXT_DESC *monitor,
                                             FILE *output_file) {
        STRING box_fname = fname;
        const char *lastdot = strrchr(box_fname.string(), '.');
        if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0';
        box_fname += ".box";
        // read_next_box() will close box_file
        FILE *box_file = open_file(box_fname.string(), "r");

        PAGE_RES_IT page_res_it;
        page_res_it.page_res = page_res;
        page_res_it.restart_page();
        STRING label;

        // Process all the words on this page.
        TBOX tbox;  // tesseract-identified box
        TBOX bbox;  // box from the box file
        bool keep_going;
        int line_number = 0;
        int examined_words = 0;
        do {
            keep_going = read_t(&page_res_it, &tbox);
            keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label,
                                      &bbox);
            // Align bottom left points of the TBOXes.
            while (keep_going &&
                   !NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) {
                if (bbox.bottom() < tbox.bottom()) {
                    page_res_it.forward();
                    keep_going = read_t(&page_res_it, &tbox);
                } else {
                    keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
                                             &bbox);
                }
            }
            while (keep_going &&
                   !NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) {
                if (bbox.left() > tbox.left()) {
                    page_res_it.forward();
                    keep_going = read_t(&page_res_it, &tbox);
                } else {
                    keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
                                             &bbox);
                }
            }
            // OCR the word if top right points of the TBOXes are similar.
            if (keep_going &&
                NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) &&
                NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) {
                ambigs_classify_and_output(label.string(), &page_res_it, output_file);
                examined_words++;
            }
            page_res_it.forward();
        } while (keep_going);
        fclose(box_file);

        // Set up scripts on all of the words that did not get sent to
        // ambigs_classify_and_output.  They all should have, but if all the
        // werd_res's don't get uch_sets, tesseract will crash when you try
        // to iterate over them. :-(
        int total_words = 0;
        for (page_res_it.restart_page(); page_res_it.block() != NULL;
             page_res_it.forward()) {
            if (page_res_it.word()) {
                if (page_res_it.word()->uch_set == NULL)
                    page_res_it.word()->SetupFake(unicharset);
                total_words++;
            }
        }
        if (examined_words < 0.85 * total_words) {
            tprintf("TODO(antonova): clean up recog_training_segmented; "
                            " It examined only a small fraction of the ambigs image.\n");
        }
        tprintf("recog_training_segmented: examined %d / %d words.\n",
                examined_words, total_words);
    }
Example #13
0
/**********************************************************************
 * choose_best_seam
 *
 * Choose the best seam that can be created by assembling this a
 * collection of splits.  A queue of all the possible seams is
 * maintained.  Each new split received is placed in that queue with
 * its partial priority value.  These values in the seam queue are
 * evaluated and combined until a good enough seam is found.  If no
 * further good seams are being found then this function returns to the
 * caller, who will send more splits.  If this function is called with
 * a split of NULL, then no further splits can be supplied by the
 * caller.
 **********************************************************************/
void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
                               PRIORITY priority, SEAM **seam_result,
                               TBLOB *blob, SeamPile *seam_pile) {
  SEAM *seam;
  char str[80];
  float my_priority;
  /* Add seam of split */
  my_priority = priority;
  if (split != NULL) {
    TPOINT split_point = split->point1->pos;
    split_point += split->point2->pos;
    split_point /= 2;
    seam = new SEAM(my_priority, split_point, *split);
    if (chop_debug > 1) seam->Print("Partial priority    ");
    add_seam_to_queue(my_priority, seam, seam_queue);

    if (my_priority > chop_good_split)
      return;
  }

  TBOX bbox = blob->bounding_box();
  /* Queue loop */
  while (!seam_queue->empty()) {
    SeamPair seam_pair;
    seam_queue->Pop(&seam_pair);
    seam = seam_pair.extract_data();
    /* Set full priority */
    my_priority = seam->FullPriority(bbox.left(), bbox.right(),
                                     chop_overlap_knob, chop_centered_maxwidth,
                                     chop_center_knob, chop_width_change_knob);
    if (chop_debug) {
      sprintf (str, "Full my_priority %0.0f,  ", my_priority);
      seam->Print(str);
    }

    if ((*seam_result == NULL || (*seam_result)->priority() > my_priority) &&
        my_priority < chop_ok_split) {
      /* No crossing */
      if (seam->IsHealthy(*blob, chop_min_outline_points,
                          chop_min_outline_area)) {
        delete *seam_result;
        *seam_result = new SEAM(*seam);
        (*seam_result)->set_priority(my_priority);
      } else {
        delete seam;
        seam = NULL;
        my_priority = BAD_PRIORITY;
      }
    }

    if (my_priority < chop_good_split) {
      if (seam)
        delete seam;
      return;                    /* Made good answer */
    }

    if (seam) {
      /* Combine with others */
      if (seam_pile->size() < chop_seam_pile_size) {
        combine_seam(*seam_pile, seam, seam_queue);
        SeamDecPair pair(seam_pair.key(), seam);
        seam_pile->Push(&pair);
      } else if (chop_new_seam_pile &&
                 seam_pile->size() == chop_seam_pile_size &&
                 seam_pile->PeekTop().key() > seam_pair.key()) {
        combine_seam(*seam_pile, seam, seam_queue);
        SeamDecPair pair;
        seam_pile->Pop(&pair);  // pop the worst.
        // Replace the seam in pair (deleting the old one) with
        // the new seam and score, then push back into the heap.
        pair.set_key(seam_pair.key());
        pair.set_data(seam);
        seam_pile->Push(&pair);
      } else {
        delete seam;
      }
    }

    my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
                                      : seam_queue->PeekTop().key();
    if ((my_priority > chop_ok_split) ||
      (my_priority > chop_good_split && split))
      return;
  }
}
Example #14
0
// Search vertically for a blob that is aligned with the input bbox.
// The search parameters are determined by AlignedBlobParams.
// top_to_bottom tells whether to search down or up.
// The return value is NULL if nothing was found in the search box
// or if a blob was found in the gutter. On a NULL return, end_y
// is set to the edge of the search box or the leading edge of the
// gutter blob if one was found.
BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
                                       bool top_to_bottom, BLOBNBOX* bbox,
                                       int x_start, int* end_y) {
  TBOX box = bbox->bounding_box();
  // If there are separator lines, get the column edges.
  int left_column_edge = bbox->left_rule();
  int right_column_edge = bbox->right_rule();
  // start_y is used to guarantee that forward progress is made and the
  // search does not go into an infinite loop. New blobs must extend the
  // line beyond start_y.
  int start_y = top_to_bottom ? box.bottom() : box.top();
  if (WithinTestRegion(2, x_start, start_y)) {
    tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
            box.left(), box.top(), box.right(), box.bottom(),
            left_column_edge, right_column_edge);
  }
  // Compute skew tolerance.
  int skew_tolerance = p.max_v_gap / kMaxSkewFactor;
  // Calculate xmin and xmax of the search box so that it contains
  // all possibly relevant boxes upto p.max_v_gap above or below accoording
  // to top_to_bottom.
  // Start with a notion of vertical with the current estimate.
  int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y();
  if (top_to_bottom) {
    x2 = x_start - x2;
    *end_y = start_y - p.max_v_gap;
  } else {
    x2 = x_start + x2;
    *end_y = start_y + p.max_v_gap;
  }
  // Expand the box by an additional skew tolerance
  int xmin = MIN(x_start, x2) - skew_tolerance;
  int xmax = MAX(x_start, x2) + skew_tolerance;
  // Now add direction-specific tolerances.
  if (p.right_tab) {
    xmax += p.min_gutter;
    xmin -= p.l_align_tolerance;
  } else {
    xmax += p.r_align_tolerance;
    xmin -= p.min_gutter;
  }
  // Setup a vertical search for an aligned blob.
  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(this);
  if (WithinTestRegion(2, x_start, start_y))
    tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n",
            p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left",
            xmin, xmax, start_y, p.max_v_gap, p.min_gutter);
  vsearch.StartVerticalSearch(xmin, xmax, start_y);
  // result stores the best real return value.
  BLOBNBOX* result = NULL;
  // The backup_result is not a tab candidate and can be used if no
  // real tab candidate result is found.
  BLOBNBOX* backup_result = NULL;
  // neighbour is the blob that is currently being investigated.
  BLOBNBOX* neighbour = NULL;
  while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != NULL) {
    if (neighbour == bbox)
      continue;
    TBOX nbox = neighbour->bounding_box();
    int n_y = (nbox.top() + nbox.bottom()) / 2;
    if ((!top_to_bottom && n_y > start_y + p.max_v_gap) ||
        (top_to_bottom && n_y < start_y - p.max_v_gap)) {
      if (WithinTestRegion(2, x_start, start_y))
        tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n",
                nbox.left(), nbox.bottom(), nbox.right(), nbox.top());
      break;  // Gone far enough.
    }
    // It is CRITICAL to ensure that forward progress is made, (strictly
    // in/decreasing n_y) or the caller could loop infinitely, while
    // waiting for a sequence of blobs in a line to end.
    // NextVerticalSearch alone does not guarantee this, as there may be
    // more than one blob in a grid cell. See comment in AlignTabs.
    if ((n_y < start_y) != top_to_bottom || n_y == start_y)
      continue;  // Only look in the required direction.
    if (result != NULL &&
        ((top_to_bottom && n_y < result->bounding_box().bottom()) ||
         (!top_to_bottom && n_y > result->bounding_box().top())))
      return result;  // This result is clear.
    if (backup_result != NULL && p.ragged &&
        ((top_to_bottom && n_y < backup_result->bounding_box().bottom()) ||
         (!top_to_bottom && n_y > backup_result->bounding_box().top())))
      return backup_result;  // This result is clear.

    // If the neighbouring blob is the wrong side of a separator line, then it
    // "doesn't exist" as far as we are concerned.
    int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y();
    if (x_at_n_y < neighbour->left_crossing_rule() ||
        x_at_n_y > neighbour->right_crossing_rule())
      continue;  // Separator line in the way.
    int n_left = nbox.left();
    int n_right = nbox.right();
    int n_x = p.right_tab ? n_right : n_left;
    if (WithinTestRegion(2, x_start, start_y))
      tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n",
              nbox.left(), nbox.bottom(), nbox.right(), nbox.top(),
              n_x, n_y, x_at_n_y);
    if (p.right_tab &&
        n_left < x_at_n_y + p.min_gutter &&
        n_right > x_at_n_y + p.r_align_tolerance &&
        (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) {
      // In the gutter so end of line.
      if (bbox->right_tab_type() >= TT_UNCONFIRMED)
        bbox->set_right_tab_type(TT_DELETED);
      *end_y = top_to_bottom ? nbox.top() : nbox.bottom();
      if (WithinTestRegion(2, x_start, start_y))
        tprintf("gutter\n");
      return NULL;
    }
    if (!p.right_tab &&
        n_left < x_at_n_y - p.l_align_tolerance &&
        n_right > x_at_n_y - p.min_gutter &&
        (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) {
      // In the gutter so end of line.
      if (bbox->left_tab_type() >= TT_UNCONFIRMED)
        bbox->set_left_tab_type(TT_DELETED);
      *end_y = top_to_bottom ? nbox.top() : nbox.bottom();
      if (WithinTestRegion(2, x_start, start_y))
        tprintf("gutter\n");
      return NULL;
    }
    if ((p.right_tab && neighbour->leader_on_right()) ||
        (!p.right_tab && neighbour->leader_on_left()))
      continue;  // Neigbours of leaders are not allowed to be used.
    if (n_x <= x_at_n_y + p.r_align_tolerance &&
        n_x >= x_at_n_y - p.l_align_tolerance) {
      // Aligned so keep it. If it is a marked tab save it as result,
      // otherwise keep it as backup_result to return in case of later failure.
      if (WithinTestRegion(2, x_start, start_y))
        tprintf("aligned, seeking%d, l=%d, r=%d\n",
                p.right_tab, neighbour->left_tab_type(),
                neighbour->right_tab_type());
      if ((p.right_tab && neighbour->right_tab_type() != TT_NONE) ||
          (!p.right_tab && neighbour->left_tab_type() != TT_NONE)) {
        if (result == NULL) {
          result = neighbour;
        } else {
          // Keep the closest neighbour.
          int old_y = (result->bounding_box().top() +
                       result->bounding_box().bottom()) / 2;
          if (abs(n_y - start_y) < abs(old_y - start_y))
            result = neighbour;
        }
      } else if (backup_result == NULL) {
        if (WithinTestRegion(2, x_start, start_y))
          tprintf("Backup\n");
        backup_result = neighbour;
      } else {
        TBOX backup_box = backup_result->bounding_box();
        if ((p.right_tab && backup_box.right() < nbox.right()) ||
            (!p.right_tab && backup_box.left() > nbox.left())) {
          if (WithinTestRegion(2, x_start, start_y))
            tprintf("Better backup\n");
          backup_result = neighbour;
        }
      }
    }
  }
  return result != NULL ? result : backup_result;
}
Example #15
0
// TODO(rays) Merge with outline_complexity.
inT32 OL_BUCKETS::count_children(                     // recursive count
                                 C_OUTLINE *outline,  // parent outline
                                 inT32 max_count      // max output
                                ) {
  BOOL8 parent_box;              // could it be boxy
  inT16 xmin, xmax;              // coord limits
  inT16 ymin, ymax;
  inT16 xindex, yindex;          // current bucket
  C_OUTLINE *child;              // current child
  inT32 child_count;             // no of children
  inT32 grandchild_count;        // no of grandchildren
  inT32 parent_area;             // potential box
  FLOAT32 max_parent_area;       // potential box
  inT32 child_area;              // current child
  inT32 child_length;            // current child
  TBOX olbox;
  C_OUTLINE_IT child_it;         // search iterator

  olbox = outline->bounding_box();
  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
  child_count = 0;
  grandchild_count = 0;
  parent_area = 0;
  max_parent_area = 0;
  parent_box = TRUE;
  for (yindex = ymin; yindex <= ymax; yindex++) {
    for (xindex = xmin; xindex <= xmax; xindex++) {
      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
      if (child_it.empty())
        continue;
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        child = child_it.data();
        if (child != outline && *child < *outline) {
          child_count++;
          if (child_count <= max_count) {
            int max_grand =(max_count - child_count) /
                            edges_children_per_grandchild;
            if (max_grand > 0)
              grandchild_count += count_children(child, max_grand) *
                                  edges_children_per_grandchild;
            else
              grandchild_count += count_children(child, 1);
          }
          if (child_count + grandchild_count > max_count) {
            if (edges_debug)
              tprintf("Discarding parent with child count=%d, gc=%d\n",
                      child_count,grandchild_count);
            return child_count + grandchild_count;
          }
          if (parent_area == 0) {
            parent_area = outline->outer_area();
            if (parent_area < 0)
              parent_area = -parent_area;
            max_parent_area = outline->bounding_box().area() * edges_boxarea;
            if (parent_area < max_parent_area)
              parent_box = FALSE;
          }
          if (parent_box &&
              (!edges_children_fix ||
               child->bounding_box().height() > edges_min_nonhole)) {
            child_area = child->outer_area();
            if (child_area < 0)
              child_area = -child_area;
            if (edges_children_fix) {
              if (parent_area - child_area < max_parent_area) {
                parent_box = FALSE;
                continue;
              }
              if (grandchild_count > 0) {
                if (edges_debug)
                  tprintf("Discarding parent of area %d, child area=%d, max%g "
                          "with gc=%d\n",
                          parent_area, child_area, max_parent_area,
                          grandchild_count);
                return max_count + 1;
              }
              child_length = child->pathlength();
              if (child_length * child_length >
                  child_area * edges_patharea_ratio) {
                if (edges_debug)
                  tprintf("Discarding parent of area %d, child area=%d, max%g "
                          "with child length=%d\n",
                          parent_area, child_area, max_parent_area,
                          child_length);
                return max_count + 1;
              }
            }
            if (child_area < child->bounding_box().area() * edges_childarea) {
              if (edges_debug)
                tprintf("Discarding parent of area %d, child area=%d, max%g "
                        "with child rect=%d\n",
                        parent_area, child_area, max_parent_area,
                        child->bounding_box().area());
              return max_count + 1;
            }
          }
        }
      }
    }
  }
  return child_count + grandchild_count;
}
Example #16
0
inT32 OL_BUCKETS::outline_complexity(
                                     C_OUTLINE *outline,   // parent outline
                                     inT32 max_count,      // max output
                                     inT16 depth           // recurion depth
                                    ) {
  inT16 xmin, xmax;              // coord limits
  inT16 ymin, ymax;
  inT16 xindex, yindex;          // current bucket
  C_OUTLINE *child;              // current child
  inT32 child_count;             // no of children
  inT32 grandchild_count;        // no of grandchildren
  C_OUTLINE_IT child_it;         // search iterator

  TBOX olbox = outline->bounding_box();
  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
  child_count = 0;
  grandchild_count = 0;
  if (++depth > edges_max_children_layers)  // nested loops are too deep
    return max_count + depth;

  for (yindex = ymin; yindex <= ymax; yindex++) {
    for (xindex = xmin; xindex <= xmax; xindex++) {
      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
      if (child_it.empty())
        continue;
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        child = child_it.data();
        if (child == outline || !(*child < *outline))
          continue;
        child_count++;

        if (child_count > edges_max_children_per_outline) {   // too fragmented
          if (edges_debug)
            tprintf("Discard outline on child_count=%d > "
                    "max_children_per_outline=%d\n",
                    child_count,
                    static_cast<inT32>(edges_max_children_per_outline));
          return max_count + child_count;
        }

        // Compute the "complexity" of each child recursively
        inT32 remaining_count = max_count - child_count - grandchild_count;
        if (remaining_count > 0)
          grandchild_count += edges_children_per_grandchild *
                              outline_complexity(child, remaining_count, depth);
        if (child_count + grandchild_count > max_count) {  // too complex
          if (edges_debug)
            tprintf("Disgard outline on child_count=%d + grandchild_count=%d "
                    "> max_count=%d\n",
                    child_count, grandchild_count, max_count);
          return child_count + grandchild_count;
        }
      }
    }
  }
  return child_count + grandchild_count;
}