float blob_noise_score(PBLOB *blob) { OUTLINE_IT outline_it; TBOX box; //BB of outline inT16 outline_count = 0; inT16 max_dimension; inT16 largest_outline_dimension = 0; outline_it.set_to_list (blob->out_list ()); for (outline_it.mark_cycle_pt (); !outline_it.cycled_list (); outline_it.forward ()) { outline_count++; box = outline_it.data ()->bounding_box (); if (box.height () > box.width ()) max_dimension = box.height (); else max_dimension = box.width (); if (largest_outline_dimension < max_dimension) largest_outline_dimension = max_dimension; } if (fixsp_noise_score_fixing) { if (outline_count > 5) //penalise LOTS of blobs largest_outline_dimension *= 2; box = blob->bounding_box (); if ((box.bottom () > bln_baseline_offset * 4) || (box.top () < bln_baseline_offset / 2)) //Lax blob is if high or low largest_outline_dimension /= 2; } return largest_outline_dimension; }
float Tesseract::blob_noise_score(TBLOB *blob) { TBOX box; // BB of outline inT16 outline_count = 0; inT16 max_dimension; inT16 largest_outline_dimension = 0; for (TESSLINE* ol = blob->outlines; ol != NULL; ol= ol->next) { outline_count++; box = ol->bounding_box(); if (box.height() > box.width()) { max_dimension = box.height(); } else { max_dimension = box.width(); } if (largest_outline_dimension < max_dimension) largest_outline_dimension = max_dimension; } if (outline_count > 5) { // penalise LOTS of blobs largest_outline_dimension *= 2; } box = blob->bounding_box(); if (box.bottom() > kBlnBaselineOffset * 4 || box.top() < kBlnBaselineOffset / 2) { // Lax blob is if high or low largest_outline_dimension /= 2; } return largest_outline_dimension; }
// Tests each blob in the list to see if it is certain non-text using 2 // conditions: // 1. blob overlaps a cell with high value in noise_density_ (previously set // by ComputeNoiseDensity). // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This // condition is disabled with max_blob_overlaps == -1. // If it does, the blob is declared non-text, and is used to mark up the // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their // neighbours reset, as they may now point to deleted data. // WARNING: The blobs list blobs may be in the *this grid, but they are // not removed. If any deleted blobs might be in *this, then this must be // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. // If the win is not NULL, deleted blobs are drawn on it in red, and kept // blobs are drawn on it in ok_color. void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, int max_blob_overlaps, ScrollView* win, ScrollView::Color ok_color, Pix* nontext_mask) { int imageheight = tright().y() - bleft().x(); BLOBNBOX_IT blob_it(blobs); BLOBNBOX_LIST dead_blobs; BLOBNBOX_IT dead_it(&dead_blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX box = blob->bounding_box(); if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && (max_blob_overlaps < 0 || !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { blob->ClearNeighbours(); #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ok_color, ok_color); #endif // GRAPHICS_DISABLED } else { if (noise_density_->AnyZeroInRect(box)) { // There is a danger that the bounding box may overlap real text, so // we need to render the outline. Pix* blob_pix = blob->cblob()->render_outline(); pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SRC | PIX_DST, blob_pix, 0, 0); pixDestroy(&blob_pix); } else { if (box.area() < gridsize() * gridsize()) { // It is a really bad idea to make lots of small components in the // photo mask, so try to join it to a bigger area by expanding the // box in a way that does not touch any zero noise density cell. box = AttemptBoxExpansion(box, *noise_density_, gridsize()); } // All overlapped cells are non-zero, so just mark the rectangle. pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SET, NULL, 0, 0); } #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ScrollView::RED, ScrollView::RED); #endif // GRAPHICS_DISABLED // It is safe to delete the cblob now, as it isn't used by the grid // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the // dead_blobs list. // TODO(rays) delete the delete when the BLOBNBOX destructor deletes // the cblob. delete blob->cblob(); dead_it.add_to_end(blob_it.extract()); } } }
PBLOB::PBLOB( //constructor C_BLOB *cblob, //compact blob float xheight //height of line ) { TBOX bbox; //bounding box if (!cblob->out_list ()->empty ()) { //get bounding box bbox = cblob->bounding_box (); if (bbox.height () > xheight) xheight = bbox.height (); //max of line and blob //copy it approximate_outline_list (cblob->out_list (), &outlines, xheight); } }
// Compute the distance from the from_box to the to_box using curved // projection space. Separation that involves a decrease in projection // density (moving from the from_box to the to_box) is weighted more heavily // than constant density, and an increase is weighted less. // If horizontal_textline is true, then curved space is used vertically, // as for a diacritic on the edge of a textline. // The projection uses original image coords, so denorm is used to get // back to the image coords from box/part space. // How the calculation works: Think of a diacritic near a textline. // Distance is measured from the far side of the from_box to the near side of // the to_box. Shown is the horizontal textline case. // |------^-----| // | from | box | // |------|-----| // perpendicular | // <------v-------->|--------------------| // parallel | to box | // |--------------------| // Perpendicular distance uses "curved space" See VerticalDistance below. // Parallel distance is linear. // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio. int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box, bool horizontal_textline, const DENORM* denorm, bool debug) const { // The parallel_gap is the horizontal gap between a horizontal textline and // the box. Analogous for vertical. int parallel_gap = 0; // start_pt is the box end of the line to be modified for curved space. TPOINT start_pt; // end_pt is the partition end of the line to be modified for curved space. TPOINT end_pt; if (horizontal_textline) { parallel_gap = from_box.x_gap(to_box) + from_box.width(); start_pt.x = (from_box.left() + from_box.right()) / 2; end_pt.x = start_pt.x; if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) { start_pt.y = from_box.top(); end_pt.y = MIN(to_box.top(), start_pt.y); } else { start_pt.y = from_box.bottom(); end_pt.y = MAX(to_box.bottom(), start_pt.y); } } else { parallel_gap = from_box.y_gap(to_box) + from_box.height(); if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) { start_pt.x = from_box.right(); end_pt.x = MIN(to_box.right(), start_pt.x); } else { start_pt.x = from_box.left(); end_pt.x = MAX(to_box.left(), start_pt.x); } start_pt.y = (from_box.bottom() + from_box.top()) / 2; end_pt.y = start_pt.y; } // The perpendicular gap is the max vertical distance gap out of: // top of from_box to to_box top and bottom of from_box to to_box bottom. // This value is then modified for curved projection space. // Analogous for vertical. int perpendicular_gap = 0; // If start_pt == end_pt, then the from_box lies entirely within the to_box // (in the perpendicular direction), so we don't need to calculate the // perpendicular_gap. if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) { if (denorm != NULL) { // Denormalize the start and end. denorm->DenormTransform(NULL, start_pt, &start_pt); denorm->DenormTransform(NULL, end_pt, &end_pt); } if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) { perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, end_pt.y); } else { perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, start_pt.y); } } // The parallel_gap weighs less than the perpendicular_gap. return perpendicular_gap + parallel_gap / kParaPerpDistRatio; }
// Sets up the DENORM to execute a non-linear transformation based on // preserving an even distribution of stroke edges. The transformation // operates only within the given box. // x_coords is a collection of the x-coords of vertical edges for each // y-coord starting at box.bottom(). // y_coords is a collection of the y-coords of horizontal edges for each // x-coord starting at box.left(). // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. // The second-level vectors must all be sorted in ascending order. // See comments on the helper functions above for more details. void DENORM::SetupNonLinear( const DENORM* predecessor, const TBOX& box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector<GenericVector<int> >& x_coords, const GenericVector<GenericVector<int> >& y_coords) { Clear(); predecessor_ = predecessor; // x_map_ and y_map_ store a mapping from input x and y coordinate to output // x and y coordinate, based on scaling to the supplied target_width and // target_height. x_map_ = new GenericVector<float>; y_map_ = new GenericVector<float>; // Set a 2-d image array to the run lengths at each pixel. int width = box.width(); int height = box.height(); GENERIC_2D_ARRAY<int> minruns(width, height, 0); ComputeRunlengthImage(box, x_coords, y_coords, &minruns); // Edge density is the sum of the inverses of the run lengths. Compute // edge density projection profiles. ComputeEdgeDensityProfiles(box, minruns, x_map_, y_map_); // Convert the edge density profiles to the coordinates by multiplying by // the desired size and accumulating. (*x_map_)[width] = target_width; for (int x = width - 1; x >= 0; --x) { (*x_map_)[x] = (*x_map_)[x + 1] - (*x_map_)[x] * target_width; } (*y_map_)[height] = target_height; for (int y = height - 1; y >= 0; --y) { (*y_map_)[y] = (*y_map_)[y + 1] - (*y_map_)[y] * target_height; } x_origin_ = box.left(); y_origin_ = box.bottom(); final_xshift_ = final_xshift; final_yshift_ = final_yshift; }
// Converts the run-length image (see above to the edge density profiles used // for scaling, thus: // ______________ // |7 1_1_1_1_1 7| = 5.28 // |1|5 5 1 5 5|1| = 3.8 // |1|2 2|1|2 2|1| = 5 // |1|2 2|1|2 2|1| = 5 // |1|2 2|1|2 2|1| = 5 // |1|2 2|1|2 2|1| = 5 // |1|5_5_1_5_5|1| = 3.8 // |7_1_1_1_1_1_7| = 5.28 // 6 4 4 8 4 4 6 // . . . . . . . // 2 4 4 0 4 4 2 // 8 8 // Each profile is the sum of the reciprocals of the pixels in the image in // the appropriate row or column, and these are then normalized to sum to 1. // On output hx, hy contain an extra element, which will eventually be used // to guarantee that the top/right edge of the box (and anything beyond) always // gets mapped to the maximum target coordinate. static void ComputeEdgeDensityProfiles(const TBOX& box, const GENERIC_2D_ARRAY<int>& minruns, GenericVector<float>* hx, GenericVector<float>* hy) { int width = box.width(); int height = box.height(); hx->init_to_size(width + 1, 0.0); hy->init_to_size(height + 1, 0.0); double total = 0.0; for (int iy = 0; iy < height; ++iy) { for (int ix = 0; ix < width; ++ix) { int run = minruns(ix, iy); if (run == 0) run = 1; float density = 1.0f / run; (*hx)[ix] += density; (*hy)[iy] += density; } total += (*hy)[iy]; } // Normalize each profile to sum to 1. if (total > 0.0) { for (int ix = 0; ix < width; ++ix) { (*hx)[ix] /= total; } for (int iy = 0; iy < height; ++iy) { (*hy)[iy] /= total; } } // There is an extra element in each array, so initialize to 1. (*hx)[width] = 1.0f; (*hy)[height] = 1.0f; }
// Adds edges to the given vectors. // For all the edge steps in all the outlines, or polygonal approximation // where there are no edge steps, collects the steps into x_coords/y_coords. // x_coords is a collection of the x-coords of vertical edges for each // y-coord starting at box.bottom(). // y_coords is a collection of the y-coords of horizontal edges for each // x-coord starting at box.left(). // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. void TBLOB::GetEdgeCoords(const TBOX& box, GenericVector<GenericVector<int> >* x_coords, GenericVector<GenericVector<int> >* y_coords) const { GenericVector<int> empty; x_coords->init_to_size(box.height(), empty); y_coords->init_to_size(box.width(), empty); CollectEdges(box, nullptr, nullptr, x_coords, y_coords); // Sort the output vectors. for (int i = 0; i < x_coords->size(); ++i) (*x_coords)[i].sort(); for (int i = 0; i < y_coords->size(); ++i) (*y_coords)[i].sort(); }
/********************************************************************** * char_box_to_tbox * * Create a TBOX from a character bounding box. If nonzero, the * x_offset accounts for any additional padding of the word box that * should be taken into account. * **********************************************************************/ TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) { l_int32 left; l_int32 top; l_int32 width; l_int32 height; l_int32 right; l_int32 bottom; boxGetGeometry(char_box, &left, &top, &width, &height); left += word_box.left() - x_offset; right = left + width; top = word_box.bottom() + word_box.height() - top; bottom = top - height; return TBOX(left, bottom, right, top); }
// Helper for SetupNonLinear computes an image of shortest run-lengths from // the x/y edges provided. // Based on "A nonlinear normalization method for handprinted Kanji character // recognition -- line density equalization" by Hiromitsu Yamada et al. // Eg below is an O in a 1-pixel margin-ed bounding box and the corresponding // ______________ input x_coords and y_coords. // | _________ | <empty> // | | _ | | 1, 6 // | | | | | | 1, 3, 4, 6 // | | | | | | 1, 3, 4, 6 // | | | | | | 1, 3, 4, 6 // | | |_| | | 1, 3, 4, 6 // | |_________| | 1, 6 // |_____________| <empty> // E 1 1 1 1 1 E // m 7 7 2 7 7 m // p 6 p // t 7 t // y y // The output image contains the min of the x and y run-length (distance // between edges) at each coordinate in the image thus: // ______________ // |7 1_1_1_1_1 7| // |1|5 5 1 5 5|1| // |1|2 2|1|2 2|1| // |1|2 2|1|2 2|1| // |1|2 2|1|2 2|1| // |1|2 2|1|2 2|1| // |1|5_5_1_5_5|1| // |7_1_1_1_1_1_7| // Note that the input coords are all integer, so all partial pixels are dealt // with elsewhere. Although it is nice for outlines to be properly connected // and continuous, there is no requirement that they be as such, so they could // have been derived from a flaky source, such as greyscale. // This function works only within the provided box, and it is assumed that the // input x_coords and y_coords have already been translated to have the bottom- // left of box as the origin. Although an output, the minruns should have been // pre-initialized to be the same size as box. Each element will contain the // minimum of x and y run-length as shown above. static void ComputeRunlengthImage( const TBOX& box, const GenericVector<GenericVector<int> >& x_coords, const GenericVector<GenericVector<int> >& y_coords, GENERIC_2D_ARRAY<int>* minruns) { int width = box.width(); int height = box.height(); ASSERT_HOST(minruns->dim1() == width); ASSERT_HOST(minruns->dim2() == height); // Set a 2-d image array to the run lengths at each pixel. for (int ix = 0; ix < width; ++ix) { int y = 0; for (int i = 0; i < y_coords[ix].size(); ++i) { int y_edge = ClipToRange(y_coords[ix][i], 0, height); int gap = y_edge - y; // Every pixel between the last and current edge get set to the gap. while (y < y_edge) { (*minruns)(ix, y) = gap; ++y; } } // Pretend there is a bounding box of edges all around the image. int gap = height - y; while (y < height) { (*minruns)(ix, y) = gap; ++y; } } // Now set the image pixels the the MIN of the x and y runlengths. for (int iy = 0; iy < height; ++iy) { int x = 0; for (int i = 0; i < x_coords[iy].size(); ++i) { int x_edge = ClipToRange(x_coords[iy][i], 0, width); int gap = x_edge - x; while (x < x_edge) { if (gap < (*minruns)(x, iy)) (*minruns)(x, iy) = gap; ++x; } } int gap = width - x; while (x < width) { if (gap < (*minruns)(x, iy)) (*minruns)(x, iy) = gap; ++x; } } }
// Given an input pix, and a box, the sides of the box are shrunk inwards until // they bound any black pixels found within the original box. // The function converts between tesseract coords and the pix coords assuming // that this pix is full resolution equal in size to the original image. // Returns an empty box if there are no black pixels in the source box. static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) { int im_height = pixGetHeight(pix); Box* input_box = boxCreate(box.left(), im_height - box.top(), box.width(), box.height()); Box* output_box = NULL; pixClipBoxToForeground(pix, input_box, NULL, &output_box); TBOX result_box; if (output_box != NULL) { l_int32 x, y, width, height; boxGetGeometry(output_box, &x, &y, &width, &height); result_box.set_left(x); result_box.set_right(x + width); result_box.set_top(im_height - y); result_box.set_bottom(result_box.top() - height); boxDestroy(&output_box); } boxDestroy(&input_box); return result_box; }
BOOL8 suspect_fullstop(WERD_RES *word, inT16 i) { float aspect_ratio; PBLOB_LIST *blobs = word->outword->blob_list (); PBLOB_IT blob_it(blobs); inT16 j; TBOX box; inT16 width; inT16 height; for (j = 0; j < i; j++) blob_it.forward (); box = blob_it.data ()->bounding_box (); width = box.width (); height = box.height (); aspect_ratio = ((width > height) ? ((float) width) / height : ((float) height) / width); return (aspect_ratio > tessed_fullstop_aspect_ratio); }
/********************************************************************** * make_rotated_tess_blob * * Make a single Tess style blob, applying the given rotation and * renormalizing. **********************************************************************/ TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob, BOOL8 flatten) { if (denorm != NULL && denorm->block() != NULL && denorm->block()->classify_rotation().y() != 0.0) { TBOX box = blob->bounding_box(); int src_width = box.width(); int src_height = box.height(); src_width = static_cast<int>(src_width / denorm->scale() + 0.5); src_height = static_cast<int>(src_height / denorm->scale() + 0.5); int x_middle = (box.left() + box.right()) / 2; int y_middle = (box.top() + box.bottom()) / 2; PBLOB* rotated_blob = PBLOB::deep_copy(blob); rotated_blob->move(FCOORD(-x_middle, -y_middle)); rotated_blob->rotate(denorm->block()->classify_rotation()); ICOORD median_size = denorm->block()->median_size(); int tolerance = median_size.x() / 8; // TODO(dsl/rays) find a better normalization solution. In the mean time // make it work for CJK by normalizing for Cap height in the same way // as is applied in compute_block_xheight when the row is presumed to // be ALLCAPS, i.e. the x-height is the fixed fraction // blob height * textord_merge_x / (textord_merge_x + textord_merge_asc) if (NearlyEqual(src_width, static_cast<int>(median_size.x()), tolerance) && NearlyEqual(src_height, static_cast<int>(median_size.y()), tolerance)) { float target_height = bln_x_height * (textord_merge_x + textord_merge_asc) / textord_merge_x; rotated_blob->scale(target_height / box.width()); rotated_blob->move(FCOORD(0.0f, bln_baseline_offset - rotated_blob->bounding_box().bottom())); } TBLOB* result = make_tess_blob(rotated_blob, flatten); delete rotated_blob; return result; } else { return make_tess_blob(blob, flatten); } }
// Fixes the block so it obeys all the rules: // Must have at least one ROW. // Must have at least one WERD. // WERDs contain a fake blob. void Textord::cleanup_nontext_block(BLOCK* block) { // Non-text blocks must contain at least one row. ROW_IT row_it(block->row_list()); if (row_it.empty()) { TBOX box = block->bounding_box(); float height = box.height(); inT32 xstarts[2] = {box.left(), box.right()}; double coeffs[3] = {0.0, 0.0, static_cast<double>(box.bottom())}; ROW* row = new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f, height / 4.0f, 0, 1); row_it.add_after_then_move(row); } // Each row must contain at least one word. for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { ROW* row = row_it.data(); WERD_IT w_it(row->word_list()); if (w_it.empty()) { // Make a fake blob to put in the word. TBOX box = block->row_list()->singleton() ? block->bounding_box() : row->bounding_box(); C_BLOB* blob = C_BLOB::FakeBlob(box); C_BLOB_LIST blobs; C_BLOB_IT blob_it(&blobs); blob_it.add_after_then_move(blob); WERD* word = new WERD(&blobs, 0, NULL); w_it.add_after_then_move(word); } // Each word must contain a fake blob. for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { WERD* word = w_it.data(); // Just assert that this is true, as it would be useful to find // out why it isn't. ASSERT_HOST(!word->cblob_list()->empty()); } row->recalc_bounding_box(); } }
// Returns true if the blob is small enough to be a large speckle. bool Classify::LargeSpeckle(const TBLOB &blob) { double speckle_size = kBlnXHeight * speckle_large_max_size; TBOX bbox = blob.bounding_box(); return bbox.width() < speckle_size && bbox.height() < speckle_size; }
/** * This routine returns TRUE if both the width of height * of Blob are less than the MaxLargeSpeckleSize. * * Globals: * - #speckle_large_max_size largest allowed speckle * * Exceptions: none * History: Mon Mar 11 10:06:49 1991, DSJ, Created. * * @param blob blob to test against speckle criteria * * @return TRUE if blob is speckle, FALSE otherwise. */ BOOL8 LargeSpeckle(TBLOB *blob) { double speckle_size = BASELINE_SCALE * speckle_large_max_size; TBOX bbox = blob->bounding_box(); return (bbox.width() < speckle_size && bbox.height() < speckle_size); } /* LargeSpeckle */
void char_clip_word( // WERD *word, //word to be processed IMAGE &bin_image, //whole image PIXROW_LIST *&pixrow_list, //pixrows built IMAGELINE *&imlines, //lines cut from image TBOX &pix_box //box defining imlines ) { TBOX word_box = word->bounding_box (); PBLOB_LIST *blob_list; PBLOB_IT blob_it; PIXROW_IT pixrow_it; inT16 pix_offset; //Y pos of pixrow[0] inT16 row_height; //No of pix rows inT16 imlines_x_offset; PIXROW *prev; PIXROW *next; PIXROW *current; BOOL8 changed; //still improving BOOL8 just_changed; //still improving inT16 iteration_count = 0; inT16 foreground_colour; if (word->flag (W_INVERSE)) foreground_colour = 1; else foreground_colour = 0; /* Define region for max pixrow expansion */ pix_box = word_box; pix_box.move_bottom_edge (-pix_word_margin); pix_box.move_top_edge (pix_word_margin); pix_box.move_left_edge (-pix_word_margin); pix_box.move_right_edge (pix_word_margin); pix_box -= TBOX (ICOORD (0, 0 + BUG_OFFSET), ICOORD (bin_image.get_xsize (), bin_image.get_ysize () - BUG_OFFSET)); /* Generate pixrows list */ pix_offset = pix_box.bottom (); row_height = pix_box.height (); blob_list = word->blob_list (); blob_it.set_to_list (blob_list); pixrow_list = new PIXROW_LIST; pixrow_it.set_to_list (pixrow_list); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { PIXROW *row = new PIXROW (pix_offset, row_height, blob_it.data ()); ASSERT_HOST (!row-> bad_box (bin_image.get_xsize (), bin_image.get_ysize ())); pixrow_it.add_after_then_move (row); } imlines = generate_imlines (bin_image, pix_box); /* Contract pixrows - shrink min and max back to black pixels */ imlines_x_offset = pix_box.left (); pixrow_it.move_to_first (); for (pixrow_it.mark_cycle_pt (); !pixrow_it.cycled_list (); pixrow_it.forward ()) { ASSERT_HOST (!pixrow_it.data ()-> bad_box (bin_image.get_xsize (), bin_image.get_ysize ())); pixrow_it.data ()->contract (imlines, imlines_x_offset, foreground_colour); ASSERT_HOST (!pixrow_it.data ()-> bad_box (bin_image.get_xsize (), bin_image.get_ysize ())); } /* Expand pixrows iteratively 1 pixel at a time */ do { changed = FALSE; pixrow_it.move_to_first (); prev = NULL; current = NULL; next = pixrow_it.data (); for (pixrow_it.mark_cycle_pt (); !pixrow_it.cycled_list (); pixrow_it.forward ()) { prev = current; current = next; if (pixrow_it.at_last ()) next = NULL; else next = pixrow_it.data_relative (1); just_changed = current->extend (imlines, pix_box, prev, next, foreground_colour); ASSERT_HOST (!current-> bad_box (bin_image.get_xsize (), bin_image.get_ysize ())); changed = changed || just_changed; } iteration_count++; } while (changed); }
// Search vertically for a blob that is aligned with the input bbox. // The search parameters are determined by AlignedBlobParams. // top_to_bottom tells whether to search down or up. // The return value is nullptr if nothing was found in the search box // or if a blob was found in the gutter. On a nullptr return, end_y // is set to the edge of the search box or the leading edge of the // gutter blob if one was found. BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, bool top_to_bottom, BLOBNBOX* bbox, int x_start, int* end_y) { TBOX box = bbox->bounding_box(); // If there are separator lines, get the column edges. int left_column_edge = bbox->left_rule(); int right_column_edge = bbox->right_rule(); // start_y is used to guarantee that forward progress is made and the // search does not go into an infinite loop. New blobs must extend the // line beyond start_y. int start_y = top_to_bottom ? box.bottom() : box.top(); if (WithinTestRegion(2, x_start, start_y)) { tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", box.left(), box.top(), box.right(), box.bottom(), left_column_edge, right_column_edge); } // Compute skew tolerance. int skew_tolerance = p.max_v_gap / kMaxSkewFactor; // Calculate xmin and xmax of the search box so that it contains // all possibly relevant boxes up to p.max_v_gap above or below accoording // to top_to_bottom. // Start with a notion of vertical with the current estimate. int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y(); if (top_to_bottom) { x2 = x_start - x2; *end_y = start_y - p.max_v_gap; } else { x2 = x_start + x2; *end_y = start_y + p.max_v_gap; } // Expand the box by an additional skew tolerance int xmin = std::min(x_start, x2) - skew_tolerance; int xmax = std::max(x_start, x2) + skew_tolerance; // Now add direction-specific tolerances. if (p.right_tab) { xmax += p.min_gutter; xmin -= p.l_align_tolerance; } else { xmax += p.r_align_tolerance; xmin -= p.min_gutter; } // Setup a vertical search for an aligned blob. GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(this); if (WithinTestRegion(2, x_start, start_y)) tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n", p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left", xmin, xmax, start_y, p.max_v_gap, p.min_gutter); vsearch.StartVerticalSearch(xmin, xmax, start_y); // result stores the best real return value. BLOBNBOX* result = nullptr; // The backup_result is not a tab candidate and can be used if no // real tab candidate result is found. BLOBNBOX* backup_result = nullptr; // neighbour is the blob that is currently being investigated. BLOBNBOX* neighbour = nullptr; while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != nullptr) { if (neighbour == bbox) continue; TBOX nbox = neighbour->bounding_box(); int n_y = (nbox.top() + nbox.bottom()) / 2; if ((!top_to_bottom && n_y > start_y + p.max_v_gap) || (top_to_bottom && n_y < start_y - p.max_v_gap)) { if (WithinTestRegion(2, x_start, start_y)) tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", nbox.left(), nbox.bottom(), nbox.right(), nbox.top()); break; // Gone far enough. } // It is CRITICAL to ensure that forward progress is made, (strictly // in/decreasing n_y) or the caller could loop infinitely, while // waiting for a sequence of blobs in a line to end. // NextVerticalSearch alone does not guarantee this, as there may be // more than one blob in a grid cell. See comment in AlignTabs. if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box)) continue; // Only look in the required direction. if (result != nullptr && result->bounding_box().y_gap(nbox) > gridsize()) return result; // This result is clear. if (backup_result != nullptr && p.ragged && result == nullptr && backup_result->bounding_box().y_gap(nbox) > gridsize()) return backup_result; // This result is clear. // If the neighbouring blob is the wrong side of a separator line, then it // "doesn't exist" as far as we are concerned. int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y(); if (x_at_n_y < neighbour->left_crossing_rule() || x_at_n_y > neighbour->right_crossing_rule()) continue; // Separator line in the way. int n_left = nbox.left(); int n_right = nbox.right(); int n_x = p.right_tab ? n_right : n_left; if (WithinTestRegion(2, x_start, start_y)) tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n", nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), n_x, n_y, x_at_n_y); if (p.right_tab && n_left < x_at_n_y + p.min_gutter && n_right > x_at_n_y + p.r_align_tolerance && (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) { // In the gutter so end of line. if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED) bbox->set_right_tab_type(TT_DELETED); *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); if (WithinTestRegion(2, x_start, start_y)) tprintf("gutter\n"); return nullptr; } if (!p.right_tab && n_left < x_at_n_y - p.l_align_tolerance && n_right > x_at_n_y - p.min_gutter && (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) { // In the gutter so end of line. if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED) bbox->set_left_tab_type(TT_DELETED); *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); if (WithinTestRegion(2, x_start, start_y)) tprintf("gutter\n"); return nullptr; } if ((p.right_tab && neighbour->leader_on_right()) || (!p.right_tab && neighbour->leader_on_left())) continue; // Neighbours of leaders are not allowed to be used. if (n_x <= x_at_n_y + p.r_align_tolerance && n_x >= x_at_n_y - p.l_align_tolerance) { // Aligned so keep it. If it is a marked tab save it as result, // otherwise keep it as backup_result to return in case of later failure. if (WithinTestRegion(2, x_start, start_y)) tprintf("aligned, seeking%d, l=%d, r=%d\n", p.right_tab, neighbour->left_tab_type(), neighbour->right_tab_type()); TabType n_type = p.right_tab ? neighbour->right_tab_type() : neighbour->left_tab_type(); if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) { if (result == nullptr) { result = neighbour; } else { // Keep the closest neighbour by Euclidean distance. // This prevents it from picking a tab blob in another column. const TBOX& old_box = result->bounding_box(); int x_diff = p.right_tab ? old_box.right() : old_box.left(); x_diff -= x_at_n_y; int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y; int old_dist = x_diff * x_diff + y_diff * y_diff; x_diff = n_x - x_at_n_y; y_diff = n_y - start_y; int new_dist = x_diff * x_diff + y_diff * y_diff; if (new_dist < old_dist) result = neighbour; } } else if (backup_result == nullptr) { if (WithinTestRegion(2, x_start, start_y)) tprintf("Backup\n"); backup_result = neighbour; } else { TBOX backup_box = backup_result->bounding_box(); if ((p.right_tab && backup_box.right() < nbox.right()) || (!p.right_tab && backup_box.left() > nbox.left())) { if (WithinTestRegion(2, x_start, start_y)) tprintf("Better backup\n"); backup_result = neighbour; } } } } return result != nullptr ? result : backup_result; }
// Returns a Pix rendering of the blob. pixDestroy after use. Pix* C_BLOB::render() { TBOX box = bounding_box(); Pix* pix = pixCreate(box.width(), box.height(), 1); render_outline_list(&outlines, box.left(), box.top(), pix); return pix; }
// Collects edges into the given bounding box, LLSQ accumulator and/or x_coords, // y_coords vectors. // For a description of x_coords/y_coords, see GetEdgeCoords above. // Startpt to lastpt, inclusive, MUST have the same src_outline member, // which may be NULL. The vector from lastpt to its next is included in // the accumulation. Hidden edges should be excluded by the caller. // The input denorm should be the normalizations that have been applied from // the image to the current state of the TBLOB from which startpt, lastpt come. // box is the bounding box of the blob from which the EDGEPTs are taken and // indices into x_coords, y_coords are offset by box.botleft(). static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt, const DENORM& denorm, const TBOX& box, TBOX* bounding_box, LLSQ* accumulator, GenericVector<GenericVector<int> > *x_coords, GenericVector<GenericVector<int> > *y_coords) { const C_OUTLINE* outline = startpt->src_outline; int x_limit = box.width() - 1; int y_limit = box.height() - 1; if (outline != NULL) { // Use higher-resolution edge points stored on the outline. // The outline coordinates may not match the binary image because of the // rotation for vertical text lines, but the root_denorm IS the matching // start of the DENORM chain. const DENORM* root_denorm = denorm.RootDenorm(); int step_length = outline->pathlength(); int start_index = startpt->start_step; // Note that if this run straddles the wrap-around point of the outline, // that lastpt->start_step may have a lower index than startpt->start_step, // and we want to use an end_index that allows us to use a positive // increment, so we add step_length if necessary, but that may be beyond the // bounds of the outline steps/ due to wrap-around, so we use % step_length // everywhere, except for start_index. int end_index = lastpt->start_step + lastpt->step_count; if (end_index <= start_index) end_index += step_length; // pos is the integer coordinates of the binary image steps. ICOORD pos = outline->position_at_index(start_index); FCOORD origin(box.left(), box.bottom()); // f_pos is a floating-point version of pos that offers improved edge // positioning using greyscale information or smoothing of edge steps. FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, start_index); // pos_normed is f_pos after the appropriate normalization, and relative // to origin. // prev_normed is the previous value of pos_normed. FCOORD prev_normed; denorm.NormTransform(root_denorm, f_pos, &prev_normed); prev_normed -= origin; for (int index = start_index; index < end_index; ++index) { ICOORD step = outline->step(index % step_length); // Only use the point if its edge strength is positive. This excludes // points that don't provide useful information, eg // ___________ // |___________ // The vertical step provides only noisy, damaging information, as even // with a greyscale image, the positioning of the edge there may be a // fictitious extrapolation, so previous processing has eliminated it. if (outline->edge_strength_at_index(index % step_length) > 0) { FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, index % step_length); FCOORD pos_normed; denorm.NormTransform(root_denorm, f_pos, &pos_normed); pos_normed -= origin; // Accumulate the information that is selected by the caller. if (bounding_box != NULL) { SegmentBBox(pos_normed, prev_normed, bounding_box); } if (accumulator != NULL) { SegmentLLSQ(pos_normed, prev_normed, accumulator); } if (x_coords != NULL && y_coords != NULL) { SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, x_coords, y_coords); } prev_normed = pos_normed; } pos += step; } } else { // There is no outline, so we are forced to use the polygonal approximation. const EDGEPT* endpt = lastpt->next; const EDGEPT* pt = startpt; do { FCOORD next_pos(pt->next->pos.x - box.left(), pt->next->pos.y - box.bottom()); FCOORD pos(pt->pos.x - box.left(), pt->pos.y - box.bottom()); if (bounding_box != NULL) { SegmentBBox(next_pos, pos, bounding_box); } if (accumulator != NULL) { SegmentLLSQ(next_pos, pos, accumulator); } if (x_coords != NULL && y_coords != NULL) { SegmentCoords(next_pos, pos, x_limit, y_limit, x_coords, y_coords); } } while ((pt = pt->next) != endpt); } }