Пример #1
0
BOX* M_Utils::getColPartImCoords(ColPartition* cp, PIX* im) {
  BLOBNBOX_CLIST* blobnboxes = cp->boxes();
  CLIST_ITERATOR bbox_it(blobnboxes);
  l_int32 height = (l_int32)im->h;
  l_int32 left = INT_MAX;
  l_int32 right = INT_MIN;
  l_int32 top = INT_MAX;
  l_int32 bottom = INT_MIN;
  l_int32 l, r, t, b;
  for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list();
      bbox_it.forward()) {
    BLOBNBOX* blobnbox = (BLOBNBOX*)bbox_it.data();
    l = (l_int32)blobnbox->cblob()->bounding_box().left();
    r = (l_int32)blobnbox->cblob()->bounding_box().right();
    t = height - (l_int32)blobnbox->cblob()->bounding_box().top();
    b = height - (l_int32)blobnbox->cblob()->bounding_box().bottom();
    if(l < left)
      left = l;
    if(r > right)
      right = r;
    if(t < top)
      top = t;
    if(b > bottom)
      bottom = b;
  }
  BOX* boxret = boxCreate(left, top, right-left, bottom-top);
  return boxret;
}
Пример #2
0
// Helper to compute edge offsets for  all the blobs on the list.
// See coutln.h for an explanation of edge offsets.
void BLOBNBOX::ComputeEdgeOffsets(Pix* thresholds, Pix* grey,
                                  BLOBNBOX_LIST* blobs) {
  int grey_height = 0;
  int thr_height = 0;
  int scale_factor = 1;
  if (thresholds != NULL && grey != NULL) {
    grey_height = pixGetHeight(grey);
    thr_height = pixGetHeight(thresholds);
    scale_factor =
        IntCastRounded(static_cast<double>(grey_height) / thr_height);
  }
  BLOBNBOX_IT blob_it(blobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    if (blob->cblob() != NULL) {
      // Get the threshold that applies to this blob.
      l_uint32 threshold = 128;
      if (thresholds != NULL && grey != NULL) {
        const TBOX& box = blob->cblob()->bounding_box();
        // Transform the coordinates if required.
        TPOINT pt((box.left() + box.right()) / 2,
                  (box.top() + box.bottom()) / 2);
        pixGetPixel(thresholds, pt.x / scale_factor,
                    thr_height - 1 - pt.y / scale_factor, &threshold);
      }
      blob->cblob()->ComputeEdgeOffsets(threshold, grey);
    }
  }
}
Пример #3
0
// Inserts a list of blobs into the projection.
// Rotation is a multiple of 90 degrees to get from blob coords to
// nontext_map coords, nontext_map_box is the bounds of the nontext_map.
// Blobs are spread horizontally or vertically according to their internal
// flags, but the spreading is truncated by set pixels in the nontext_map
// and also by the horizontal rule line limits on the blobs.
void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
                                      const FCOORD& rotation,
                                      const TBOX& nontext_map_box,
                                      Pix* nontext_map) {
  BLOBNBOX_IT blob_it(blobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    TBOX bbox = blob->bounding_box();
    ICOORD middle((bbox.left() + bbox.right()) / 2,
                  (bbox.bottom() + bbox.top()) / 2);
    bool spreading_horizontally = PadBlobBox(blob, &bbox);
    // Rotate to match the nontext_map.
    bbox.rotate(rotation);
    middle.rotate(rotation);
    if (rotation.x() == 0.0f)
      spreading_horizontally = !spreading_horizontally;
    // Clip to the image before applying the increments.
    bbox &= nontext_map_box;  // This is in-place box intersection.
    // Check for image pixels before spreading.
    TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
                             nontext_map, &bbox);
    if (bbox.area() > 0) {
      IncrementRectangle8Bit(bbox);
    }
  }
}
Пример #4
0
// Computes the noise_density_ by summing the number of elements in a
// neighbourhood of each grid cell.
void StrokeWidth::ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid) {
  // Run a preliminary strokewidth neighbour detection on the medium blobs.
  line_grid->InsertBlobList(true, true, false, &block->blobs, false, this);
  BLOBNBOX_IT blob_it(&block->blobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    SetNeighbours(false, blob_it.data());
  }
  // Remove blobs with a good strokewidth neighbour from the grid.
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    if (blob->GoodTextBlob() > 0)
      RemoveBBox(blob);
    blob->ClearNeighbours();
  }
  // Insert the smaller blobs into the grid.
  line_grid->InsertBlobList(true, true, false, &block->small_blobs,
                            false, this);
  line_grid->InsertBlobList(true, true, false, &block->noise_blobs,
                            false, this);
  if (noise_density_ != NULL)
    delete noise_density_;
  IntGrid* cell_counts = CountCellElements();
  noise_density_ = cell_counts->NeighbourhoodSum();
  delete cell_counts;
  // Clear the grid as we don't want the small stuff hanging around in it.
  Clear();
}
Пример #5
0
// Helper function to divide the input blobs over noise, small, medium
// and large lists. Blobs small in height and (small in width or large in width)
// go in the noise list. Dash (-) candidates go in the small list, and
// medium and large are by height.
// SIDE-EFFECT: reset all blobs to initial state by calling Init().
static void SizeFilterBlobs(int min_height, int max_height,
                            BLOBNBOX_LIST* src_list,
                            BLOBNBOX_LIST* noise_list,
                            BLOBNBOX_LIST* small_list,
                            BLOBNBOX_LIST* medium_list,
                            BLOBNBOX_LIST* large_list) {
  BLOBNBOX_IT noise_it(noise_list);
  BLOBNBOX_IT small_it(small_list);
  BLOBNBOX_IT medium_it(medium_list);
  BLOBNBOX_IT large_it(large_list);
  for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) {
    BLOBNBOX* blob = src_it.extract();
    blob->ReInit();
    int width = blob->bounding_box().width();
    int height = blob->bounding_box().height();
    if (height < min_height  &&
        (width < min_height || width > max_height))
      noise_it.add_after_then_move(blob);
    else if (height > max_height)
      large_it.add_after_then_move(blob);
    else if (height < min_height)
      small_it.add_after_then_move(blob);
    else
      medium_it.add_after_then_move(blob);
  }
}
Пример #6
0
// Inserts all the blobs from the given list, with x and y spreading,
// without removing from the source list, so ownership remains with the
// source list.
    void BlobGrid::InsertBlobList(BLOBNBOX_LIST * blobs) {
        BLOBNBOX_IT blob_it(blobs);
        for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
            BLOBNBOX *blob = blob_it.data();
            if (!blob->joined_to_prev())
                InsertBBox(true, true, blob);
        }
    }
Пример #7
0
static void clear_blobnboxes(BLOBNBOX_LIST* boxes) {
  BLOBNBOX_IT it = boxes;
  // A BLOBNBOX generally doesn't own its blobs, so if they do, you
  // have to delete them explicitly.
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* box = it.data();
    delete box->cblob();
  }
}
Пример #8
0
// Return the partner of this TabVector if the vector qualifies as
// being a vertical text line, otherwise NULL.
TabVector* TabVector::VerticalTextlinePartner() {
  if (!partners_.singleton())
    return NULL;
  TabVector_C_IT partner_it(&partners_);
  TabVector* partner = partner_it.data();
  BLOBNBOX_C_IT box_it1(&boxes_);
  BLOBNBOX_C_IT box_it2(&partner->boxes_);
  // Count how many boxes are also in the other list.
  // At the same time, gather the mean width and median vertical gap.
  if (textord_debug_tabfind > 1) {
    Print("Testing for vertical text");
    partner->Print("           partner");
  }
  int num_matched = 0;
  int num_unmatched = 0;
  int total_widths = 0;
  int width = startpt().x() - partner->startpt().x();
  if (width < 0)
    width = -width;
  STATS gaps(0, width * 2);
  BLOBNBOX* prev_bbox = NULL;
  box_it2.mark_cycle_pt();
  for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
    BLOBNBOX* bbox = box_it1.data();
    TBOX box = bbox->bounding_box();
    if (prev_bbox != NULL) {
      gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
    }
    while (!box_it2.cycled_list() && box_it2.data() != bbox &&
           box_it2.data()->bounding_box().bottom() < box.bottom()) {
      box_it2.forward();
    }
    if (!box_it2.cycled_list() && box_it2.data() == bbox &&
        bbox->region_type() >= BRT_UNKNOWN &&
        (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN))
      ++num_matched;
    else
      ++num_unmatched;
    total_widths += box.width();
    prev_bbox = bbox;
  }
  if (num_unmatched + num_matched == 0) return NULL;
  double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
  double max_gap = textord_tabvector_vertical_gap_fraction * avg_width;
  int min_box_match = static_cast<int>((num_matched + num_unmatched) *
                                       textord_tabvector_vertical_box_ratio);
  bool is_vertical = (gaps.get_total() > 0 &&
                      num_matched >= min_box_match &&
                      gaps.median() <= max_gap);
  if (textord_debug_tabfind > 1) {
    tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d "
            "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
            gaps.get_total(), num_matched, num_unmatched, min_box_match,
            gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No");
  }
  return (is_vertical) ? partner : NULL;
}
Пример #9
0
// NULLs out any neighbours that are DeletableNoise to remove references.
void BLOBNBOX::CleanNeighbours() {
  for (int dir = 0; dir < BND_COUNT; ++dir) {
    BLOBNBOX* neighbour = neighbours_[dir];
    if (neighbour != NULL && neighbour->DeletableNoise()) {
      neighbours_[dir] = NULL;
      good_stroke_neighbours_[dir] = false;
    }
  }
}
Пример #10
0
// Helper to delete all the deletable blobs on the list.
void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) {
  BLOBNBOX_IT blob_it(blobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    if (blob->DeletableNoise()) {
      delete blob->cblob();
      delete blob_it.extract();
    }
  }
}
Пример #11
0
// Print basic information about this tab vector and every box in it.
void TabVector::Debug(const char* prefix) {
  Print(prefix);
  BLOBNBOX_C_IT it(&boxes_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* bbox = it.data();
    const TBOX& box = bbox->bounding_box();
    tprintf("Box at (%d,%d)->(%d,%d)\n",
            box.left(), box.bottom(), box.right(), box.top());
  }
}
Пример #12
0
// Returns the number of side neighbours that are of type BRT_NOISE.
int BLOBNBOX::NoisyNeighbours() const {
  int count = 0;
  for (int dir = 0; dir < BND_COUNT; ++dir) {
    BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
    BLOBNBOX* blob = neighbour(bnd);
    if (blob != NULL && blob->region_type() == BRT_NOISE)
      ++count;
  }
  return count;
}
Пример #13
0
// Finds horizontal line objects in the given pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
void LineFinder::FindHorizontalLines(int resolution,  Pix* pix,
                                     TabVector_LIST* vectors) {
#ifdef HAVE_LIBLEPT
  Pix* line_pix;
  Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix);
  C_BLOB_LIST line_cblobs;
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs);
  // Make the BLOBNBOXes from the C_BLOBs.
  BLOBNBOX_LIST line_bblobs;
  C_BLOB_IT blob_it(&line_cblobs);
  BLOBNBOX_IT bbox_it(&line_bblobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    C_BLOB* cblob = blob_it.data();
    BLOBNBOX* bblob = new BLOBNBOX(cblob);
    bbox_it.add_to_end(bblob);
  }
  ICOORD bleft(0, 0);
  ICOORD tright(height, width);
  int vertical_x, vertical_y;
  FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
                  vectors);
  if (!vectors->empty()) {
    // Some lines were found, so erase the unused blobs from the line image
    // and then subtract the line image from the source.
    bbox_it.move_to_first();
    for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
      BLOBNBOX* blob = bbox_it.data();
      if (blob->left_tab_type() == TT_UNCONFIRMED) {
        const TBOX& box = blob->bounding_box();
        // Coords are in tess format so filp x and y and then covert
        // to leptonica by height -y.
        Box* pixbox = boxCreate(box.bottom(), height - box.right(),
                                box.height(), box.width());
        pixClearInRect(line_pix, pixbox);
        boxDestroy(&pixbox);
      }
    }
    pixDilateBrick(line_pix, line_pix, 3, 1);
    pixSubtract(pix, pix, line_pix);
    if (textord_tabfind_show_vlines)
      pixWrite("hlinesclean.png", line_pix, IFF_PNG);
    ICOORD vertical;
    vertical.set_with_shrink(vertical_x, vertical_y);
    TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
    // Iterate the vectors to flip them.
    TabVector_IT h_it(vectors);
    for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
      h_it.data()->XYFlip();
    }
  }
  pixDestroy(&line_pix);
#endif
}
Пример #14
0
// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
// given list in the given body_colour, with child outlines in the
// child_colour.
void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list,
                              ScrollView::Color body_colour,
                              ScrollView::Color child_colour,
                              ScrollView* win) {
  BLOBNBOX_IT it(list);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* blob = it.data();
    if (blob->DeletableNoise())
      blob->plot(win, body_colour, child_colour);
  }
}
Пример #15
0
// Returns the box gaps between this and its neighbours_ in an array
// indexed by BlobNeighbourDir.
void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
  for (int dir = 0; dir < BND_COUNT; ++dir) {
    gaps[dir] = MAX_INT16;
    BLOBNBOX* neighbour = neighbours_[dir];
    if (neighbour != NULL) {
      TBOX n_box = neighbour->bounding_box();
      if (dir == BND_LEFT || dir == BND_RIGHT) {
        gaps[dir] = box.x_gap(n_box);
      } else {
        gaps[dir] = box.y_gap(n_box);
      }
    }
  }
}
Пример #16
0
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
// are the bounds of the image on which the input line_bblobs were found.
// The input line_bblobs list is const really.
// The output vertical_x and vertical_y are the total of all the vectors.
// The output list of TabVector makes no reference to the input BLOBNBOXes.
void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
                                 BLOBNBOX_LIST* line_bblobs,
                                 int* vertical_x, int* vertical_y,
                                 TabVector_LIST* vectors) {
  BLOBNBOX_IT bbox_it(line_bblobs);
  int b_count = 0;
  // Put all the blobs into the grid to find the lines, and move the blobs
  // to the output lists.
  AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
  for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
    BLOBNBOX* bblob = bbox_it.data();
    bblob->set_left_tab_type(TT_UNCONFIRMED);
    bblob->set_left_rule(bleft.x());
    bblob->set_right_rule(tright.x());
    bblob->set_left_crossing_rule(bleft.x());
    bblob->set_right_crossing_rule(tright.x());
    blob_grid.InsertBBox(false, true, bblob);
    ++b_count;
  }
  if (textord_debug_tabfind)
    tprintf("Inserted %d line blobs into grid\n", b_count);
  if (b_count == 0)
    return;

  // Search the entire grid, looking for vertical line vectors.
  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> lsearch(&blob_grid);
  BLOBNBOX* bbox;
  TabVector_IT vector_it(vectors);
  *vertical_x = 0;
  *vertical_y = 1;
  lsearch.StartFullSearch();
  while ((bbox = lsearch.NextFullSearch()) != NULL) {
    if (bbox->left_tab_type() == TT_UNCONFIRMED) {
      const TBOX& box = bbox->bounding_box();
      if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
        tprintf("Finding line vector starting at bbox (%d,%d)\n",
                box.left(), box.bottom());
      AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
      TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox,
                                                          vertical_x,
                                                          vertical_y);
      if (vector != NULL) {
        vector->Freeze();
        vector_it.add_to_end(vector);
      }
    }
  }
  ScrollView* line_win = NULL;
  if (textord_tabfind_show_vlines) {
    line_win = blob_grid.MakeWindow(0, 50, "Vlines");
    blob_grid.DisplayBoxes(line_win);
    line_win = blob_grid.DisplayTabs("Vlines", line_win);
  }
}
Пример #17
0
// Moves blobs that look like they don't sit well on a textline from the
// input blobs list to the output small_blobs list.
// This gets them away from initial textline finding to stop diacritics
// from forming incorrect textlines. (Introduced mainly to fix Thai.)
void TextlineProjection::MoveNonTextlineBlobs(
    BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
  BLOBNBOX_IT it(blobs);
  BLOBNBOX_IT small_it(small_blobs);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* blob = it.data();
    const TBOX& box = blob->bounding_box();
    bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
                                               box.bottom());
    if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) {
      blob->ClearNeighbours();
      small_it.add_to_end(it.extract());
    }
  }
}
Пример #18
0
// Returns true if other has a similar stroke width to this.
bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other,
                                   double fractional_tolerance,
                                   double constant_tolerance) const {
  // The perimeter-based width is used as a backup in case there is
  // no information in the blob.
  double p_width = area_stroke_width();
  double n_p_width = other.area_stroke_width();
  float h_tolerance = horz_stroke_width_ * fractional_tolerance
                     + constant_tolerance;
  float v_tolerance = vert_stroke_width_ * fractional_tolerance
                     + constant_tolerance;
  double p_tolerance = p_width * fractional_tolerance
                     + constant_tolerance;
  bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f;
  bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f;
  bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_,
                                     other.horz_stroke_width_, h_tolerance);
  bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_,
                                     other.vert_stroke_width_, v_tolerance);
  bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance);
  // For a match, at least one of the horizontal and vertical widths
  // must match, and the other one must either match or be zero.
  // Only if both are zero will we look at the perimeter metric.
  return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero));
}
Пример #19
0
TBOX box_next_pre_chopped(                 //get bounding box
                         BLOBNBOX_IT *it  //iterator to blobds
                        ) {
  BLOBNBOX *blob;                //current blob
  TBOX result;                    //total box

  blob = it->data ();
  result = blob->bounding_box ();
  do {
    it->forward ();
    blob = it->data ();
  }
                                 //until next real blob
  while (blob->joined_to_prev ());
  return result;
}
Пример #20
0
// Finds vertical line objects in the given pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
void LineFinder::FindVerticalLines(int resolution,  Pix* pix,
                                   int* vertical_x, int* vertical_y,
                                   TabVector_LIST* vectors) {
#ifdef HAVE_LIBLEPT
  Pix* line_pix;
  Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix);
  C_BLOB_LIST line_cblobs;
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs);
  // Make the BLOBNBOXes from the C_BLOBs.
  BLOBNBOX_LIST line_bblobs;
  C_BLOB_IT blob_it(&line_cblobs);
  BLOBNBOX_IT bbox_it(&line_bblobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    C_BLOB* cblob = blob_it.data();
    BLOBNBOX* bblob = new BLOBNBOX(cblob);
    bbox_it.add_to_end(bblob);
  }
  ICOORD bleft(0, 0);
  ICOORD tright(width, height);
  FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
  if (!vectors->empty()) {
    // Some lines were found, so erase the unused blobs from the line image
    // and then subtract the line image from the source.
    bbox_it.move_to_first();
    for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
      BLOBNBOX* blob = bbox_it.data();
      if (blob->left_tab_type() == TT_UNCONFIRMED) {
        const TBOX& box = blob->bounding_box();
        Box* pixbox = boxCreate(box.left(), height - box.top(),
                                box.width(), box.height());
        pixClearInRect(line_pix, pixbox);
        boxDestroy(&pixbox);
      }
    }
    pixDilateBrick(line_pix, line_pix, 1, 3);
    pixSubtract(pix, pix, line_pix);
    if (textord_tabfind_show_vlines)
      pixWrite("vlinesclean.png", line_pix, IFF_PNG);
    ICOORD vertical;
    vertical.set_with_shrink(*vertical_x, *vertical_y);
    TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
  }
  pixDestroy(&line_pix);
#endif
}
Пример #21
0
// Display the blobs in the window colored according to textline quality.
void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
                                         ScrollView* win) {
#ifndef GRAPHICS_DISABLED
  BLOBNBOX_IT it(blobs);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    BLOBNBOX* blob = it.data();
    const TBOX& box = blob->bounding_box();
    bool bad_box = BoxOutOfHTextline(box, NULL, false);
    if (blob->UniquelyVertical())
      win->Pen(ScrollView::YELLOW);
    else
      win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
    win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
  }
  win->Update();
#endif  // GRAPHICS_DISABLED
}
Пример #22
0
// Display the tab codes of the BLOBNBOXes in this grid.
ScrollView* AlignedBlob::DisplayTabs(const char* window_name,
                                     ScrollView* tab_win) {
#ifndef GRAPHICS_DISABLED
  if (tab_win == nullptr)
    tab_win = MakeWindow(0, 50, window_name);
  // For every tab in the grid, display it.
  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
  gsearch.StartFullSearch();
  BLOBNBOX* bbox;
  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
    const TBOX& box = bbox->bounding_box();
    int left_x = box.left();
    int right_x = box.right();
    int top_y = box.top();
    int bottom_y = box.bottom();
    TabType tabtype = bbox->left_tab_type();
    if (tabtype != TT_NONE) {
      if (tabtype == TT_MAYBE_ALIGNED)
        tab_win->Pen(ScrollView::BLUE);
      else if (tabtype == TT_MAYBE_RAGGED)
        tab_win->Pen(ScrollView::YELLOW);
      else if (tabtype == TT_CONFIRMED)
        tab_win->Pen(ScrollView::GREEN);
      else
        tab_win->Pen(ScrollView::GREY);
      tab_win->Line(left_x, top_y, left_x, bottom_y);
    }
    tabtype = bbox->right_tab_type();
    if (tabtype != TT_NONE) {
      if (tabtype == TT_MAYBE_ALIGNED)
        tab_win->Pen(ScrollView::MAGENTA);
      else if (tabtype == TT_MAYBE_RAGGED)
        tab_win->Pen(ScrollView::ORANGE);
      else if (tabtype == TT_CONFIRMED)
        tab_win->Pen(ScrollView::RED);
      else
        tab_win->Pen(ScrollView::GREY);
      tab_win->Line(right_x, top_y, right_x, bottom_y);
    }
  }
  tab_win->Update();
#endif
  return tab_win;
}
Пример #23
0
// Sets up displacement_modes_ with the top few modes of the perpendicular
// distance of each blob from the given direction vector, after rounding.
void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
  // Set of perpendicular displacements of the blob bottoms from the required
  // baseline direction.
  GenericVector<double> perp_blob_dists;
  displacement_modes_.truncate(0);
  // Gather the skew-corrected position of every blob.
  double min_dist = MAX_FLOAT32;
  double max_dist = -MAX_FLOAT32;
  BLOBNBOX_IT blob_it(blobs_);
  bool debug = false;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    const TBOX& box = blob->bounding_box();
#ifdef kDebugYCoord
    if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
#endif
    FCOORD blob_pos((box.left() + box.right()) / 2.0f,
                    blob->baseline_position());
    double offset = direction * blob_pos;
    perp_blob_dists.push_back(offset);
    if (debug) {
      tprintf("Displacement %g for blob at:", offset);
      box.print();
    }
    UpdateRange(offset, &min_dist, &max_dist);
  }
  // Set up a histogram using disp_quant_factor_ as the bucket size.
  STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
                   IntCastRounded(max_dist / disp_quant_factor_) + 1);
  for (int i = 0; i < perp_blob_dists.size(); ++i) {
    dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
  }
  GenericVector<KDPairInc<float, int> > scaled_modes;
  dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
  if (debug) {
    for (int i = 0; i < scaled_modes.size(); ++i) {
      tprintf("Top mode = %g * %d\n",
              scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data);
    }
  }
  for (int i = 0; i < scaled_modes.size(); ++i)
    displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key);
}
Пример #24
0
// Returns true if the given blob overlaps more than max_overlaps blobs
// in the current grid.
bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
    // Search the grid to see what intersects it.
    // Setup a Rectangle search for overlapping this blob.
    BlobGridSearch rsearch(this);
    TBOX box = blob->bounding_box();
    rsearch.StartRectSearch(box);
    rsearch.SetUniqueMode(true);
    BLOBNBOX* neighbour;
    int overlap_count = 0;
    while (overlap_count <= max_overlaps &&
            (neighbour = rsearch.NextRectSearch()) != NULL) {
        if (box.major_overlap(neighbour->bounding_box())) {
            ++overlap_count;
            if (overlap_count > max_overlaps)
                return true;
        }
    }
    return false;
}
Пример #25
0
//yangjing01 modified : 
bool TAL_make_single_word(bool one_blob, TO_ROW_LIST* rows, ROW_LIST* real_rows)
{
  TO_ROW_IT to_row_it(rows);
  ROW_IT row_it(real_rows);
  //to_real_row is the real row information of single row or single char mode
  TO_ROW* real_to_row = NULL;
  float row_max_height = 0.0;
  for (to_row_it.mark_cycle_pt();
    !to_row_it.cycled_list(); to_row_it.forward()){
    TO_ROW* row = to_row_it.data();
    float row_min_y = row->min_y();
    float row_max_y = row->max_y();
    float row_height = abs(row_max_y - row_min_y);
    if (real_to_row == NULL
      || row_height > row_max_height
      || fabs(row_height - row_max_height) < 1.0f){
      row_max_height = row_height;
      real_to_row = row;
    }
  }

  if (real_to_row == NULL){
    return false;
  }

  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it(&cblobs);
  BLOBNBOX_IT box_it(real_to_row->blob_list());
  for (; !box_it.empty(); box_it.forward()){
    BLOBNBOX* bblob = box_it.extract();
    if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
      if (bblob->cblob() != NULL){
        C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
        cout_it.move_to_last();
        cout_it.add_list_after(bblob->cblob()->out_list());
        delete bblob->cblob();
      }
    }
    else {
      if (bblob->cblob() != NULL)
        cblob_it.add_after_then_move(bblob->cblob());
    }
    delete bblob;
  }
  // Convert the TO_ROW to a ROW.
  ROW* real_row = new ROW(real_to_row, static_cast<inT16>(real_to_row->kern_size),
    static_cast<inT16>(real_to_row->space_size));
  WERD_IT word_it(real_row->word_list());
  WERD* word = new WERD(&cblobs, 0, NULL);
  word->set_flag(W_BOL, TRUE);
  word->set_flag(W_EOL, TRUE);
  word->set_flag(W_DONT_CHOP, one_blob);
  word_it.add_after_then_move(word);
  row_it.add_after_then_move(real_row);

  return true;
}
Пример #26
0
// Extend this vector to include the supplied blob if it doesn't
// already have it.
void TabVector::ExtendToBox(BLOBNBOX* new_blob) {
  TBOX new_box = new_blob->bounding_box();
  BLOBNBOX_C_IT it(&boxes_);
  if (!it.empty()) {
    BLOBNBOX* blob = it.data();
    TBOX box = blob->bounding_box();
    while (!it.at_last() && box.top() <= new_box.top()) {
      if (blob == new_blob)
        return;  // We have it already.
      it.forward();
      blob = it.data();
      box = blob->bounding_box();
    }
    if (box.top() >= new_box.top()) {
      it.add_before_stay_put(new_blob);
      needs_refit_ = true;
      return;
    }
  }
  needs_refit_ = true;
  it.add_after_stay_put(new_blob);
}
Пример #27
0
void TO_ROW::compute_vertical_projection() {  //project whole row
  TBOX row_box;                   //bound of row
  BLOBNBOX *blob;                //current blob
  TBOX blob_box;                  //bounding box
  BLOBNBOX_IT blob_it = blob_list ();

  if (blob_it.empty ())
    return;
  row_box = blob_it.data ()->bounding_box ();
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ())
    row_box += blob_it.data ()->bounding_box ();

  projection.set_range (row_box.left () - PROJECTION_MARGIN,
    row_box.right () + PROJECTION_MARGIN);
  projection_left = row_box.left () - PROJECTION_MARGIN;
  projection_right = row_box.right () + PROJECTION_MARGIN;
  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
    blob = blob_it.data();
    if (blob->cblob() != NULL)
      vertical_cblob_projection(blob->cblob(), &projection);
  }
}
Пример #28
0
// Tests each blob in the list to see if it is certain non-text using 2
// conditions:
// 1. blob overlaps a cell with high value in noise_density_ (previously set
// by ComputeNoiseDensity).
// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
// condition is disabled with max_blob_overlaps == -1.
// If it does, the blob is declared non-text, and is used to mark up the
// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
// neighbours reset, as they may now point to deleted data.
// WARNING: The blobs list blobs may be in the *this grid, but they are
// not removed. If any deleted blobs might be in *this, then this must be
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
// If the win is not NULL, deleted blobs are drawn on it in red, and kept
// blobs are drawn on it in ok_color.
void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
        int max_blob_overlaps,
        ScrollView* win,
        ScrollView::Color ok_color,
        Pix* nontext_mask) {
    int imageheight = tright().y() - bleft().x();
    BLOBNBOX_IT blob_it(blobs);
    BLOBNBOX_LIST dead_blobs;
    BLOBNBOX_IT dead_it(&dead_blobs);
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
        BLOBNBOX* blob = blob_it.data();
        TBOX box = blob->bounding_box();
        if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
                (max_blob_overlaps < 0 ||
                 !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
            blob->ClearNeighbours();
#ifndef GRAPHICS_DISABLED
            if (win != NULL)
                blob->plot(win, ok_color, ok_color);
#endif  // GRAPHICS_DISABLED
        } else {
            if (noise_density_->AnyZeroInRect(box)) {
                // There is a danger that the bounding box may overlap real text, so
                // we need to render the outline.
                Pix* blob_pix = blob->cblob()->render_outline();
                pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
                            box.width(), box.height(), PIX_SRC | PIX_DST,
                            blob_pix, 0, 0);
                pixDestroy(&blob_pix);
            } else {
                if (box.area() < gridsize() * gridsize()) {
                    // It is a really bad idea to make lots of small components in the
                    // photo mask, so try to join it to a bigger area by expanding the
                    // box in a way that does not touch any zero noise density cell.
                    box = AttemptBoxExpansion(box, *noise_density_, gridsize());
                }
                // All overlapped cells are non-zero, so just mark the rectangle.
                pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
                            box.width(), box.height(), PIX_SET, NULL, 0, 0);
            }
#ifndef GRAPHICS_DISABLED
            if (win != NULL)
                blob->plot(win, ScrollView::RED, ScrollView::RED);
#endif  // GRAPHICS_DISABLED
            // It is safe to delete the cblob now, as it isn't used by the grid
            // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
            // dead_blobs list.
            // TODO(rays) delete the delete when the BLOBNBOX destructor deletes
            // the cblob.
            delete blob->cblob();
            dead_it.add_to_end(blob_it.extract());
        }
    }
}
Пример #29
0
BOX box_next(                 //get bounding box
             BLOBNBOX_IT *it  //iterator to blobds
            ) {
  BLOBNBOX *blob;                //current blob
  BOX result;                    //total box

  blob = it->data ();
  result = blob->bounding_box ();
  do {
    it->forward ();
    blob = it->data ();
    if (blob->blob () == NULL && blob->cblob () == NULL)
                                 //was pre-chopped
      result += blob->bounding_box ();
  }
                                 //until next real blob
  while (blob->blob () == NULL && blob->cblob () == NULL || blob->joined_to_prev ());
  return result;
}
Пример #30
0
WERD *make_real_word(BLOBNBOX_IT *box_it,  //iterator
                     int32_t blobcount,      //no of blobs to use
                     bool bol,            //start of line
                     uint8_t blanks          //no of blanks
                    ) {
  C_OUTLINE_IT cout_it;
  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it = &cblobs;
  WERD *word;                    // new word
  BLOBNBOX *bblob;               // current blob
  int32_t blobindex;               // in row

  for (blobindex = 0; blobindex < blobcount; blobindex++) {
    bblob = box_it->extract();
    if (bblob->joined_to_prev()) {
      if (bblob->cblob() != nullptr) {
        cout_it.set_to_list(cblob_it.data()->out_list());
        cout_it.move_to_last();
        cout_it.add_list_after(bblob->cblob()->out_list());
        delete bblob->cblob();
      }
    }
    else {
      if (bblob->cblob() != nullptr)
        cblob_it.add_after_then_move(bblob->cblob());
    }
    delete bblob;
    box_it->forward();          // next one
  }

  if (blanks < 1)
    blanks = 1;

  word = new WERD(&cblobs, blanks, nullptr);

  if (bol)
    word->set_flag(W_BOL, true);
  if (box_it->at_first())
    word->set_flag(W_EOL, true);  // at end of line

  return word;
}