Ejemplo n.º 1
0
/// Helper to compute the dispute resolution metric.
/// Disputed blob resolution. The aim is to give the blob to the most
/// appropriate boxfile box. Most of the time it is obvious, but if
/// two boxfile boxes overlap significantly it is not. If a small boxfile
/// box takes most of the blob, and a large boxfile box does too, then
/// we want the small boxfile box to get it, but if the small box
/// is much smaller than the blob, we don't want it to get it.
/// Details of the disputed blob resolution:
/// Given a box with area A, and a blob with area B, with overlap area C,
/// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum
/// miss metric gets the blob.
static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
  int overlap_area = box1.intersection(box2).area();
  double miss_metric = box1.area()- overlap_area;
  miss_metric /= box1.area();
  miss_metric *= box2.area() - overlap_area;
  miss_metric /= box2.area();
  return miss_metric;
}
Ejemplo n.º 2
0
// Inserts a list of blobs into the projection.
// Rotation is a multiple of 90 degrees to get from blob coords to
// nontext_map coords, nontext_map_box is the bounds of the nontext_map.
// Blobs are spread horizontally or vertically according to their internal
// flags, but the spreading is truncated by set pixels in the nontext_map
// and also by the horizontal rule line limits on the blobs.
void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
                                      const FCOORD& rotation,
                                      const TBOX& nontext_map_box,
                                      Pix* nontext_map) {
  BLOBNBOX_IT blob_it(blobs);
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    TBOX bbox = blob->bounding_box();
    ICOORD middle((bbox.left() + bbox.right()) / 2,
                  (bbox.bottom() + bbox.top()) / 2);
    bool spreading_horizontally = PadBlobBox(blob, &bbox);
    // Rotate to match the nontext_map.
    bbox.rotate(rotation);
    middle.rotate(rotation);
    if (rotation.x() == 0.0f)
      spreading_horizontally = !spreading_horizontally;
    // Clip to the image before applying the increments.
    bbox &= nontext_map_box;  // This is in-place box intersection.
    // Check for image pixels before spreading.
    TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
                             nontext_map, &bbox);
    if (bbox.area() > 0) {
      IncrementRectangle8Bit(bbox);
    }
  }
}
Ejemplo n.º 3
0
// Tests each blob in the list to see if it is certain non-text using 2
// conditions:
// 1. blob overlaps a cell with high value in noise_density_ (previously set
// by ComputeNoiseDensity).
// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
// condition is disabled with max_blob_overlaps == -1.
// If it does, the blob is declared non-text, and is used to mark up the
// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
// neighbours reset, as they may now point to deleted data.
// WARNING: The blobs list blobs may be in the *this grid, but they are
// not removed. If any deleted blobs might be in *this, then this must be
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
// If the win is not NULL, deleted blobs are drawn on it in red, and kept
// blobs are drawn on it in ok_color.
void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
        int max_blob_overlaps,
        ScrollView* win,
        ScrollView::Color ok_color,
        Pix* nontext_mask) {
    int imageheight = tright().y() - bleft().x();
    BLOBNBOX_IT blob_it(blobs);
    BLOBNBOX_LIST dead_blobs;
    BLOBNBOX_IT dead_it(&dead_blobs);
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
        BLOBNBOX* blob = blob_it.data();
        TBOX box = blob->bounding_box();
        if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
                (max_blob_overlaps < 0 ||
                 !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
            blob->ClearNeighbours();
#ifndef GRAPHICS_DISABLED
            if (win != NULL)
                blob->plot(win, ok_color, ok_color);
#endif  // GRAPHICS_DISABLED
        } else {
            if (noise_density_->AnyZeroInRect(box)) {
                // There is a danger that the bounding box may overlap real text, so
                // we need to render the outline.
                Pix* blob_pix = blob->cblob()->render_outline();
                pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
                            box.width(), box.height(), PIX_SRC | PIX_DST,
                            blob_pix, 0, 0);
                pixDestroy(&blob_pix);
            } else {
                if (box.area() < gridsize() * gridsize()) {
                    // It is a really bad idea to make lots of small components in the
                    // photo mask, so try to join it to a bigger area by expanding the
                    // box in a way that does not touch any zero noise density cell.
                    box = AttemptBoxExpansion(box, *noise_density_, gridsize());
                }
                // All overlapped cells are non-zero, so just mark the rectangle.
                pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
                            box.width(), box.height(), PIX_SET, NULL, 0, 0);
            }
#ifndef GRAPHICS_DISABLED
            if (win != NULL)
                blob->plot(win, ScrollView::RED, ScrollView::RED);
#endif  // GRAPHICS_DISABLED
            // It is safe to delete the cblob now, as it isn't used by the grid
            // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
            // dead_blobs list.
            // TODO(rays) delete the delete when the BLOBNBOX destructor deletes
            // the cblob.
            delete blob->cblob();
            dead_it.add_to_end(blob_it.extract());
        }
    }
}
Ejemplo n.º 4
0
// Returns true if more than half the area of the rect is covered by grid
// cells that are over the threshold.
bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const {
  int min_x, min_y, max_x, max_y;
  GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
  GridCoords(rect.right(), rect.top(), &max_x, &max_y);
  int total_area = 0;
  for (int y = min_y; y <= max_y; ++y) {
    for (int x = min_x; x <= max_x; ++x) {
      int value = GridCellValue(x, y);
      if (value > threshold) {
        TBOX cell_box(x * gridsize_, y * gridsize_,
                      (x + 1) * gridsize_, (y + 1) * gridsize_);
        cell_box &= rect;  // This is in-place box intersection.
        total_area += cell_box.area();
      }
    }
  }
  return total_area * 2 > rect.area();
}