// Computes the noise_density_ by summing the number of elements in a // neighbourhood of each grid cell. void StrokeWidth::ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid) { // Run a preliminary strokewidth neighbour detection on the medium blobs. line_grid->InsertBlobList(true, true, false, &block->blobs, false, this); BLOBNBOX_IT blob_it(&block->blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { SetNeighbours(false, blob_it.data()); } // Remove blobs with a good strokewidth neighbour from the grid. for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (blob->GoodTextBlob() > 0) RemoveBBox(blob); blob->ClearNeighbours(); } // Insert the smaller blobs into the grid. line_grid->InsertBlobList(true, true, false, &block->small_blobs, false, this); line_grid->InsertBlobList(true, true, false, &block->noise_blobs, false, this); if (noise_density_ != NULL) delete noise_density_; IntGrid* cell_counts = CountCellElements(); noise_density_ = cell_counts->NeighbourhoodSum(); delete cell_counts; // Clear the grid as we don't want the small stuff hanging around in it. Clear(); }
// Computes and returns the noise_density IntGrid, at the same gridsize as // this by summing the number of small elements in a 3x3 neighbourhood of // each grid cell. good_grid is filled with blobs that are considered most // likely good text, and this is filled with small and medium blobs that are // more likely non-text. // The photo_map is used to bias the decision towards non-text, rather than // supplying definite decision. IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map, BlobGrid* good_grid) { IntGrid* noise_counts = CountCellElements(); IntGrid* noise_density = noise_counts->NeighbourhoodSum(); IntGrid* good_counts = good_grid->CountCellElements(); // Now increase noise density in photo areas, to bias the decision and // minimize hallucinated text on image, but trim the noise_density where // there are good blobs and the original count is low in non-photo areas, // indicating that most of the result came from neighbouring cells. int height = pixGetHeight(photo_map); int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction); for (int y = 0; y < gridheight(); ++y) { for (int x = 0; x < gridwidth(); ++x) { int noise = noise_density->GridCellValue(x, y); if (max_noise_count_ < noise + photo_offset && noise <= max_noise_count_) { // Test for photo. int left = x * gridsize(); int right = left + gridsize(); int bottom = height - y * gridsize(); int top = bottom - gridsize(); if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right, &bottom)) { noise_density->SetGridCell(x, y, noise + photo_offset); } } if (debug && noise > max_noise_count_ && good_counts->GridCellValue(x, y) > 0) { tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n", x * gridsize(), y * gridsize(), noise_density->GridCellValue(x, y), good_counts->GridCellValue(x, y), noise_counts->GridCellValue(x, y), max_noise_count_); } if (noise > max_noise_count_ && good_counts->GridCellValue(x, y) > 0 && noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= max_noise_count_) { noise_density->SetGridCell(x, y, 0); } } } delete noise_counts; delete good_counts; return noise_density; }