// Computes the noise_density_ by summing the number of elements in a // neighbourhood of each grid cell. void StrokeWidth::ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid) { // Run a preliminary strokewidth neighbour detection on the medium blobs. line_grid->InsertBlobList(true, true, false, &block->blobs, false, this); BLOBNBOX_IT blob_it(&block->blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { SetNeighbours(false, blob_it.data()); } // Remove blobs with a good strokewidth neighbour from the grid. for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (blob->GoodTextBlob() > 0) RemoveBBox(blob); blob->ClearNeighbours(); } // Insert the smaller blobs into the grid. line_grid->InsertBlobList(true, true, false, &block->small_blobs, false, this); line_grid->InsertBlobList(true, true, false, &block->noise_blobs, false, this); if (noise_density_ != NULL) delete noise_density_; IntGrid* cell_counts = CountCellElements(); noise_density_ = cell_counts->NeighbourhoodSum(); delete cell_counts; // Clear the grid as we don't want the small stuff hanging around in it. Clear(); }
// Tests each blob in the list to see if it is certain non-text using 2 // conditions: // 1. blob overlaps a cell with high value in noise_density_ (previously set // by ComputeNoiseDensity). // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This // condition is disabled with max_blob_overlaps == -1. // If it does, the blob is declared non-text, and is used to mark up the // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their // neighbours reset, as they may now point to deleted data. // WARNING: The blobs list blobs may be in the *this grid, but they are // not removed. If any deleted blobs might be in *this, then this must be // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. // If the win is not NULL, deleted blobs are drawn on it in red, and kept // blobs are drawn on it in ok_color. void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, int max_blob_overlaps, ScrollView* win, ScrollView::Color ok_color, Pix* nontext_mask) { int imageheight = tright().y() - bleft().x(); BLOBNBOX_IT blob_it(blobs); BLOBNBOX_LIST dead_blobs; BLOBNBOX_IT dead_it(&dead_blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX box = blob->bounding_box(); if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && (max_blob_overlaps < 0 || !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { blob->ClearNeighbours(); #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ok_color, ok_color); #endif // GRAPHICS_DISABLED } else { if (noise_density_->AnyZeroInRect(box)) { // There is a danger that the bounding box may overlap real text, so // we need to render the outline. Pix* blob_pix = blob->cblob()->render_outline(); pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SRC | PIX_DST, blob_pix, 0, 0); pixDestroy(&blob_pix); } else { if (box.area() < gridsize() * gridsize()) { // It is a really bad idea to make lots of small components in the // photo mask, so try to join it to a bigger area by expanding the // box in a way that does not touch any zero noise density cell. box = AttemptBoxExpansion(box, *noise_density_, gridsize()); } // All overlapped cells are non-zero, so just mark the rectangle. pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SET, NULL, 0, 0); } #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ScrollView::RED, ScrollView::RED); #endif // GRAPHICS_DISABLED // It is safe to delete the cblob now, as it isn't used by the grid // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the // dead_blobs list. // TODO(rays) delete the delete when the BLOBNBOX destructor deletes // the cblob. delete blob->cblob(); dead_it.add_to_end(blob_it.extract()); } } }
// Moves blobs that look like they don't sit well on a textline from the // input blobs list to the output small_blobs list. // This gets them away from initial textline finding to stop diacritics // from forming incorrect textlines. (Introduced mainly to fix Thai.) void TextlineProjection::MoveNonTextlineBlobs( BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const { BLOBNBOX_IT it(blobs); BLOBNBOX_IT small_it(small_blobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); const TBOX& box = blob->bounding_box(); bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) { blob->ClearNeighbours(); small_it.add_to_end(it.extract()); } } }