BOX* M_Utils::getColPartImCoords(ColPartition* cp, PIX* im) { BLOBNBOX_CLIST* blobnboxes = cp->boxes(); CLIST_ITERATOR bbox_it(blobnboxes); l_int32 height = (l_int32)im->h; l_int32 left = INT_MAX; l_int32 right = INT_MIN; l_int32 top = INT_MAX; l_int32 bottom = INT_MIN; l_int32 l, r, t, b; for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* blobnbox = (BLOBNBOX*)bbox_it.data(); l = (l_int32)blobnbox->cblob()->bounding_box().left(); r = (l_int32)blobnbox->cblob()->bounding_box().right(); t = height - (l_int32)blobnbox->cblob()->bounding_box().top(); b = height - (l_int32)blobnbox->cblob()->bounding_box().bottom(); if(l < left) left = l; if(r > right) right = r; if(t < top) top = t; if(b > bottom) bottom = b; } BOX* boxret = boxCreate(left, top, right-left, bottom-top); return boxret; }
// Helper to compute edge offsets for all the blobs on the list. // See coutln.h for an explanation of edge offsets. void BLOBNBOX::ComputeEdgeOffsets(Pix* thresholds, Pix* grey, BLOBNBOX_LIST* blobs) { int grey_height = 0; int thr_height = 0; int scale_factor = 1; if (thresholds != NULL && grey != NULL) { grey_height = pixGetHeight(grey); thr_height = pixGetHeight(thresholds); scale_factor = IntCastRounded(static_cast<double>(grey_height) / thr_height); } BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (blob->cblob() != NULL) { // Get the threshold that applies to this blob. l_uint32 threshold = 128; if (thresholds != NULL && grey != NULL) { const TBOX& box = blob->cblob()->bounding_box(); // Transform the coordinates if required. TPOINT pt((box.left() + box.right()) / 2, (box.top() + box.bottom()) / 2); pixGetPixel(thresholds, pt.x / scale_factor, thr_height - 1 - pt.y / scale_factor, &threshold); } blob->cblob()->ComputeEdgeOffsets(threshold, grey); } } }
// Inserts a list of blobs into the projection. // Rotation is a multiple of 90 degrees to get from blob coords to // nontext_map coords, nontext_map_box is the bounds of the nontext_map. // Blobs are spread horizontally or vertically according to their internal // flags, but the spreading is truncated by set pixels in the nontext_map // and also by the horizontal rule line limits on the blobs. void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation, const TBOX& nontext_map_box, Pix* nontext_map) { BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX bbox = blob->bounding_box(); ICOORD middle((bbox.left() + bbox.right()) / 2, (bbox.bottom() + bbox.top()) / 2); bool spreading_horizontally = PadBlobBox(blob, &bbox); // Rotate to match the nontext_map. bbox.rotate(rotation); middle.rotate(rotation); if (rotation.x() == 0.0f) spreading_horizontally = !spreading_horizontally; // Clip to the image before applying the increments. bbox &= nontext_map_box; // This is in-place box intersection. // Check for image pixels before spreading. TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally, nontext_map, &bbox); if (bbox.area() > 0) { IncrementRectangle8Bit(bbox); } } }
// Computes the noise_density_ by summing the number of elements in a // neighbourhood of each grid cell. void StrokeWidth::ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid) { // Run a preliminary strokewidth neighbour detection on the medium blobs. line_grid->InsertBlobList(true, true, false, &block->blobs, false, this); BLOBNBOX_IT blob_it(&block->blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { SetNeighbours(false, blob_it.data()); } // Remove blobs with a good strokewidth neighbour from the grid. for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (blob->GoodTextBlob() > 0) RemoveBBox(blob); blob->ClearNeighbours(); } // Insert the smaller blobs into the grid. line_grid->InsertBlobList(true, true, false, &block->small_blobs, false, this); line_grid->InsertBlobList(true, true, false, &block->noise_blobs, false, this); if (noise_density_ != NULL) delete noise_density_; IntGrid* cell_counts = CountCellElements(); noise_density_ = cell_counts->NeighbourhoodSum(); delete cell_counts; // Clear the grid as we don't want the small stuff hanging around in it. Clear(); }
// Helper function to divide the input blobs over noise, small, medium // and large lists. Blobs small in height and (small in width or large in width) // go in the noise list. Dash (-) candidates go in the small list, and // medium and large are by height. // SIDE-EFFECT: reset all blobs to initial state by calling Init(). static void SizeFilterBlobs(int min_height, int max_height, BLOBNBOX_LIST* src_list, BLOBNBOX_LIST* noise_list, BLOBNBOX_LIST* small_list, BLOBNBOX_LIST* medium_list, BLOBNBOX_LIST* large_list) { BLOBNBOX_IT noise_it(noise_list); BLOBNBOX_IT small_it(small_list); BLOBNBOX_IT medium_it(medium_list); BLOBNBOX_IT large_it(large_list); for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) { BLOBNBOX* blob = src_it.extract(); blob->ReInit(); int width = blob->bounding_box().width(); int height = blob->bounding_box().height(); if (height < min_height && (width < min_height || width > max_height)) noise_it.add_after_then_move(blob); else if (height > max_height) large_it.add_after_then_move(blob); else if (height < min_height) small_it.add_after_then_move(blob); else medium_it.add_after_then_move(blob); } }
// Inserts all the blobs from the given list, with x and y spreading, // without removing from the source list, so ownership remains with the // source list. void BlobGrid::InsertBlobList(BLOBNBOX_LIST * blobs) { BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX *blob = blob_it.data(); if (!blob->joined_to_prev()) InsertBBox(true, true, blob); } }
static void clear_blobnboxes(BLOBNBOX_LIST* boxes) { BLOBNBOX_IT it = boxes; // A BLOBNBOX generally doesn't own its blobs, so if they do, you // have to delete them explicitly. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* box = it.data(); delete box->cblob(); } }
// Return the partner of this TabVector if the vector qualifies as // being a vertical text line, otherwise NULL. TabVector* TabVector::VerticalTextlinePartner() { if (!partners_.singleton()) return NULL; TabVector_C_IT partner_it(&partners_); TabVector* partner = partner_it.data(); BLOBNBOX_C_IT box_it1(&boxes_); BLOBNBOX_C_IT box_it2(&partner->boxes_); // Count how many boxes are also in the other list. // At the same time, gather the mean width and median vertical gap. if (textord_debug_tabfind > 1) { Print("Testing for vertical text"); partner->Print(" partner"); } int num_matched = 0; int num_unmatched = 0; int total_widths = 0; int width = startpt().x() - partner->startpt().x(); if (width < 0) width = -width; STATS gaps(0, width * 2); BLOBNBOX* prev_bbox = NULL; box_it2.mark_cycle_pt(); for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { BLOBNBOX* bbox = box_it1.data(); TBOX box = bbox->bounding_box(); if (prev_bbox != NULL) { gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); } while (!box_it2.cycled_list() && box_it2.data() != bbox && box_it2.data()->bounding_box().bottom() < box.bottom()) { box_it2.forward(); } if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN && (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN)) ++num_matched; else ++num_unmatched; total_widths += box.width(); prev_bbox = bbox; } if (num_unmatched + num_matched == 0) return NULL; double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; int min_box_match = static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio); bool is_vertical = (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap); if (textord_debug_tabfind > 1) { tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); } return (is_vertical) ? partner : NULL; }
// NULLs out any neighbours that are DeletableNoise to remove references. void BLOBNBOX::CleanNeighbours() { for (int dir = 0; dir < BND_COUNT; ++dir) { BLOBNBOX* neighbour = neighbours_[dir]; if (neighbour != NULL && neighbour->DeletableNoise()) { neighbours_[dir] = NULL; good_stroke_neighbours_[dir] = false; } } }
// Helper to delete all the deletable blobs on the list. void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) { BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (blob->DeletableNoise()) { delete blob->cblob(); delete blob_it.extract(); } } }
// Print basic information about this tab vector and every box in it. void TabVector::Debug(const char* prefix) { Print(prefix); BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); const TBOX& box = bbox->bounding_box(); tprintf("Box at (%d,%d)->(%d,%d)\n", box.left(), box.bottom(), box.right(), box.top()); } }
// Returns the number of side neighbours that are of type BRT_NOISE. int BLOBNBOX::NoisyNeighbours() const { int count = 0; for (int dir = 0; dir < BND_COUNT; ++dir) { BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir); BLOBNBOX* blob = neighbour(bnd); if (blob != NULL && blob->region_type() == BRT_NOISE) ++count; } return count; }
// Finds horizontal line objects in the given pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. void LineFinder::FindHorizontalLines(int resolution, Pix* pix, TabVector_LIST* vectors) { #ifdef HAVE_LIBLEPT Pix* line_pix; Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; int width = pixGetWidth(pix); int height = pixGetHeight(pix); ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs); // Make the BLOBNBOXes from the C_BLOBs. BLOBNBOX_LIST line_bblobs; C_BLOB_IT blob_it(&line_cblobs); BLOBNBOX_IT bbox_it(&line_bblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* cblob = blob_it.data(); BLOBNBOX* bblob = new BLOBNBOX(cblob); bbox_it.add_to_end(bblob); } ICOORD bleft(0, 0); ICOORD tright(height, width); int vertical_x, vertical_y; FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, vectors); if (!vectors->empty()) { // Some lines were found, so erase the unused blobs from the line image // and then subtract the line image from the source. bbox_it.move_to_first(); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* blob = bbox_it.data(); if (blob->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = blob->bounding_box(); // Coords are in tess format so filp x and y and then covert // to leptonica by height -y. Box* pixbox = boxCreate(box.bottom(), height - box.right(), box.height(), box.width()); pixClearInRect(line_pix, pixbox); boxDestroy(&pixbox); } } pixDilateBrick(line_pix, line_pix, 3, 1); pixSubtract(pix, pix, line_pix); if (textord_tabfind_show_vlines) pixWrite("hlinesclean.png", line_pix, IFF_PNG); ICOORD vertical; vertical.set_with_shrink(vertical_x, vertical_y); TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); // Iterate the vectors to flip them. TabVector_IT h_it(vectors); for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { h_it.data()->XYFlip(); } } pixDestroy(&line_pix); #endif }
// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the // given list in the given body_colour, with child outlines in the // child_colour. void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list, ScrollView::Color body_colour, ScrollView::Color child_colour, ScrollView* win) { BLOBNBOX_IT it(list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); if (blob->DeletableNoise()) blob->plot(win, body_colour, child_colour); } }
// Returns the box gaps between this and its neighbours_ in an array // indexed by BlobNeighbourDir. void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const { for (int dir = 0; dir < BND_COUNT; ++dir) { gaps[dir] = MAX_INT16; BLOBNBOX* neighbour = neighbours_[dir]; if (neighbour != NULL) { TBOX n_box = neighbour->bounding_box(); if (dir == BND_LEFT || dir == BND_RIGHT) { gaps[dir] = box.x_gap(n_box); } else { gaps[dir] = box.y_gap(n_box); } } } }
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright // are the bounds of the image on which the input line_bblobs were found. // The input line_bblobs list is const really. // The output vertical_x and vertical_y are the total of all the vectors. // The output list of TabVector makes no reference to the input BLOBNBOXes. void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, BLOBNBOX_LIST* line_bblobs, int* vertical_x, int* vertical_y, TabVector_LIST* vectors) { BLOBNBOX_IT bbox_it(line_bblobs); int b_count = 0; // Put all the blobs into the grid to find the lines, and move the blobs // to the output lists. AlignedBlob blob_grid(kLineFindGridSize, bleft, tright); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* bblob = bbox_it.data(); bblob->set_left_tab_type(TT_UNCONFIRMED); bblob->set_left_rule(bleft.x()); bblob->set_right_rule(tright.x()); bblob->set_left_crossing_rule(bleft.x()); bblob->set_right_crossing_rule(tright.x()); blob_grid.InsertBBox(false, true, bblob); ++b_count; } if (textord_debug_tabfind) tprintf("Inserted %d line blobs into grid\n", b_count); if (b_count == 0) return; // Search the entire grid, looking for vertical line vectors. GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> lsearch(&blob_grid); BLOBNBOX* bbox; TabVector_IT vector_it(vectors); *vertical_x = 0; *vertical_y = 1; lsearch.StartFullSearch(); while ((bbox = lsearch.NextFullSearch()) != NULL) { if (bbox->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = bbox->bounding_box(); if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) tprintf("Finding line vector starting at bbox (%d,%d)\n", box.left(), box.bottom()); AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width()); TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y); if (vector != NULL) { vector->Freeze(); vector_it.add_to_end(vector); } } } ScrollView* line_win = NULL; if (textord_tabfind_show_vlines) { line_win = blob_grid.MakeWindow(0, 50, "Vlines"); blob_grid.DisplayBoxes(line_win); line_win = blob_grid.DisplayTabs("Vlines", line_win); } }
// Moves blobs that look like they don't sit well on a textline from the // input blobs list to the output small_blobs list. // This gets them away from initial textline finding to stop diacritics // from forming incorrect textlines. (Introduced mainly to fix Thai.) void TextlineProjection::MoveNonTextlineBlobs( BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const { BLOBNBOX_IT it(blobs); BLOBNBOX_IT small_it(small_blobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); const TBOX& box = blob->bounding_box(); bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) { blob->ClearNeighbours(); small_it.add_to_end(it.extract()); } } }
// Returns true if other has a similar stroke width to this. bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other, double fractional_tolerance, double constant_tolerance) const { // The perimeter-based width is used as a backup in case there is // no information in the blob. double p_width = area_stroke_width(); double n_p_width = other.area_stroke_width(); float h_tolerance = horz_stroke_width_ * fractional_tolerance + constant_tolerance; float v_tolerance = vert_stroke_width_ * fractional_tolerance + constant_tolerance; double p_tolerance = p_width * fractional_tolerance + constant_tolerance; bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f; bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f; bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, other.horz_stroke_width_, h_tolerance); bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_, other.vert_stroke_width_, v_tolerance); bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance); // For a match, at least one of the horizontal and vertical widths // must match, and the other one must either match or be zero. // Only if both are zero will we look at the perimeter metric. return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero)); }
TBOX box_next_pre_chopped( //get bounding box BLOBNBOX_IT *it //iterator to blobds ) { BLOBNBOX *blob; //current blob TBOX result; //total box blob = it->data (); result = blob->bounding_box (); do { it->forward (); blob = it->data (); } //until next real blob while (blob->joined_to_prev ()); return result; }
// Finds vertical line objects in the given pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. // The output vertical_x and vertical_y contain a sum of the output vectors, // thereby giving the mean vertical direction. // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. void LineFinder::FindVerticalLines(int resolution, Pix* pix, int* vertical_x, int* vertical_y, TabVector_LIST* vectors) { #ifdef HAVE_LIBLEPT Pix* line_pix; Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; int width = pixGetWidth(pix); int height = pixGetHeight(pix); ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs); // Make the BLOBNBOXes from the C_BLOBs. BLOBNBOX_LIST line_bblobs; C_BLOB_IT blob_it(&line_cblobs); BLOBNBOX_IT bbox_it(&line_bblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* cblob = blob_it.data(); BLOBNBOX* bblob = new BLOBNBOX(cblob); bbox_it.add_to_end(bblob); } ICOORD bleft(0, 0); ICOORD tright(width, height); FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors); if (!vectors->empty()) { // Some lines were found, so erase the unused blobs from the line image // and then subtract the line image from the source. bbox_it.move_to_first(); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* blob = bbox_it.data(); if (blob->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = blob->bounding_box(); Box* pixbox = boxCreate(box.left(), height - box.top(), box.width(), box.height()); pixClearInRect(line_pix, pixbox); boxDestroy(&pixbox); } } pixDilateBrick(line_pix, line_pix, 1, 3); pixSubtract(pix, pix, line_pix); if (textord_tabfind_show_vlines) pixWrite("vlinesclean.png", line_pix, IFF_PNG); ICOORD vertical; vertical.set_with_shrink(*vertical_x, *vertical_y); TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); } pixDestroy(&line_pix); #endif }
// Display the blobs in the window colored according to textline quality. void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win) { #ifndef GRAPHICS_DISABLED BLOBNBOX_IT it(blobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); const TBOX& box = blob->bounding_box(); bool bad_box = BoxOutOfHTextline(box, NULL, false); if (blob->UniquelyVertical()) win->Pen(ScrollView::YELLOW); else win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE); win->Rectangle(box.left(), box.bottom(), box.right(), box.top()); } win->Update(); #endif // GRAPHICS_DISABLED }
// Display the tab codes of the BLOBNBOXes in this grid. ScrollView* AlignedBlob::DisplayTabs(const char* window_name, ScrollView* tab_win) { #ifndef GRAPHICS_DISABLED if (tab_win == nullptr) tab_win = MakeWindow(0, 50, window_name); // For every tab in the grid, display it. GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this); gsearch.StartFullSearch(); BLOBNBOX* bbox; while ((bbox = gsearch.NextFullSearch()) != nullptr) { const TBOX& box = bbox->bounding_box(); int left_x = box.left(); int right_x = box.right(); int top_y = box.top(); int bottom_y = box.bottom(); TabType tabtype = bbox->left_tab_type(); if (tabtype != TT_NONE) { if (tabtype == TT_MAYBE_ALIGNED) tab_win->Pen(ScrollView::BLUE); else if (tabtype == TT_MAYBE_RAGGED) tab_win->Pen(ScrollView::YELLOW); else if (tabtype == TT_CONFIRMED) tab_win->Pen(ScrollView::GREEN); else tab_win->Pen(ScrollView::GREY); tab_win->Line(left_x, top_y, left_x, bottom_y); } tabtype = bbox->right_tab_type(); if (tabtype != TT_NONE) { if (tabtype == TT_MAYBE_ALIGNED) tab_win->Pen(ScrollView::MAGENTA); else if (tabtype == TT_MAYBE_RAGGED) tab_win->Pen(ScrollView::ORANGE); else if (tabtype == TT_CONFIRMED) tab_win->Pen(ScrollView::RED); else tab_win->Pen(ScrollView::GREY); tab_win->Line(right_x, top_y, right_x, bottom_y); } } tab_win->Update(); #endif return tab_win; }
// Sets up displacement_modes_ with the top few modes of the perpendicular // distance of each blob from the given direction vector, after rounding. void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) { // Set of perpendicular displacements of the blob bottoms from the required // baseline direction. GenericVector<double> perp_blob_dists; displacement_modes_.truncate(0); // Gather the skew-corrected position of every blob. double min_dist = MAX_FLOAT32; double max_dist = -MAX_FLOAT32; BLOBNBOX_IT blob_it(blobs_); bool debug = false; for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); const TBOX& box = blob->bounding_box(); #ifdef kDebugYCoord if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true; #endif FCOORD blob_pos((box.left() + box.right()) / 2.0f, blob->baseline_position()); double offset = direction * blob_pos; perp_blob_dists.push_back(offset); if (debug) { tprintf("Displacement %g for blob at:", offset); box.print(); } UpdateRange(offset, &min_dist, &max_dist); } // Set up a histogram using disp_quant_factor_ as the bucket size. STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_), IntCastRounded(max_dist / disp_quant_factor_) + 1); for (int i = 0; i < perp_blob_dists.size(); ++i) { dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1); } GenericVector<KDPairInc<float, int> > scaled_modes; dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes); if (debug) { for (int i = 0; i < scaled_modes.size(); ++i) { tprintf("Top mode = %g * %d\n", scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data); } } for (int i = 0; i < scaled_modes.size(); ++i) displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key); }
// Returns true if the given blob overlaps more than max_overlaps blobs // in the current grid. bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) { // Search the grid to see what intersects it. // Setup a Rectangle search for overlapping this blob. BlobGridSearch rsearch(this); TBOX box = blob->bounding_box(); rsearch.StartRectSearch(box); rsearch.SetUniqueMode(true); BLOBNBOX* neighbour; int overlap_count = 0; while (overlap_count <= max_overlaps && (neighbour = rsearch.NextRectSearch()) != NULL) { if (box.major_overlap(neighbour->bounding_box())) { ++overlap_count; if (overlap_count > max_overlaps) return true; } } return false; }
//yangjing01 modified : bool TAL_make_single_word(bool one_blob, TO_ROW_LIST* rows, ROW_LIST* real_rows) { TO_ROW_IT to_row_it(rows); ROW_IT row_it(real_rows); //to_real_row is the real row information of single row or single char mode TO_ROW* real_to_row = NULL; float row_max_height = 0.0; for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list(); to_row_it.forward()){ TO_ROW* row = to_row_it.data(); float row_min_y = row->min_y(); float row_max_y = row->max_y(); float row_height = abs(row_max_y - row_min_y); if (real_to_row == NULL || row_height > row_max_height || fabs(row_height - row_max_height) < 1.0f){ row_max_height = row_height; real_to_row = row; } } if (real_to_row == NULL){ return false; } C_BLOB_LIST cblobs; C_BLOB_IT cblob_it(&cblobs); BLOBNBOX_IT box_it(real_to_row->blob_list()); for (; !box_it.empty(); box_it.forward()){ BLOBNBOX* bblob = box_it.extract(); if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) { if (bblob->cblob() != NULL){ C_OUTLINE_IT cout_it(cblob_it.data()->out_list()); cout_it.move_to_last(); cout_it.add_list_after(bblob->cblob()->out_list()); delete bblob->cblob(); } } else { if (bblob->cblob() != NULL) cblob_it.add_after_then_move(bblob->cblob()); } delete bblob; } // Convert the TO_ROW to a ROW. ROW* real_row = new ROW(real_to_row, static_cast<inT16>(real_to_row->kern_size), static_cast<inT16>(real_to_row->space_size)); WERD_IT word_it(real_row->word_list()); WERD* word = new WERD(&cblobs, 0, NULL); word->set_flag(W_BOL, TRUE); word->set_flag(W_EOL, TRUE); word->set_flag(W_DONT_CHOP, one_blob); word_it.add_after_then_move(word); row_it.add_after_then_move(real_row); return true; }
// Extend this vector to include the supplied blob if it doesn't // already have it. void TabVector::ExtendToBox(BLOBNBOX* new_blob) { TBOX new_box = new_blob->bounding_box(); BLOBNBOX_C_IT it(&boxes_); if (!it.empty()) { BLOBNBOX* blob = it.data(); TBOX box = blob->bounding_box(); while (!it.at_last() && box.top() <= new_box.top()) { if (blob == new_blob) return; // We have it already. it.forward(); blob = it.data(); box = blob->bounding_box(); } if (box.top() >= new_box.top()) { it.add_before_stay_put(new_blob); needs_refit_ = true; return; } } needs_refit_ = true; it.add_after_stay_put(new_blob); }
void TO_ROW::compute_vertical_projection() { //project whole row TBOX row_box; //bound of row BLOBNBOX *blob; //current blob TBOX blob_box; //bounding box BLOBNBOX_IT blob_it = blob_list (); if (blob_it.empty ()) return; row_box = blob_it.data ()->bounding_box (); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) row_box += blob_it.data ()->bounding_box (); projection.set_range (row_box.left () - PROJECTION_MARGIN, row_box.right () + PROJECTION_MARGIN); projection_left = row_box.left () - PROJECTION_MARGIN; projection_right = row_box.right () + PROJECTION_MARGIN; for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data(); if (blob->cblob() != NULL) vertical_cblob_projection(blob->cblob(), &projection); } }
// Tests each blob in the list to see if it is certain non-text using 2 // conditions: // 1. blob overlaps a cell with high value in noise_density_ (previously set // by ComputeNoiseDensity). // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This // condition is disabled with max_blob_overlaps == -1. // If it does, the blob is declared non-text, and is used to mark up the // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their // neighbours reset, as they may now point to deleted data. // WARNING: The blobs list blobs may be in the *this grid, but they are // not removed. If any deleted blobs might be in *this, then this must be // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. // If the win is not NULL, deleted blobs are drawn on it in red, and kept // blobs are drawn on it in ok_color. void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, int max_blob_overlaps, ScrollView* win, ScrollView::Color ok_color, Pix* nontext_mask) { int imageheight = tright().y() - bleft().x(); BLOBNBOX_IT blob_it(blobs); BLOBNBOX_LIST dead_blobs; BLOBNBOX_IT dead_it(&dead_blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX box = blob->bounding_box(); if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && (max_blob_overlaps < 0 || !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { blob->ClearNeighbours(); #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ok_color, ok_color); #endif // GRAPHICS_DISABLED } else { if (noise_density_->AnyZeroInRect(box)) { // There is a danger that the bounding box may overlap real text, so // we need to render the outline. Pix* blob_pix = blob->cblob()->render_outline(); pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SRC | PIX_DST, blob_pix, 0, 0); pixDestroy(&blob_pix); } else { if (box.area() < gridsize() * gridsize()) { // It is a really bad idea to make lots of small components in the // photo mask, so try to join it to a bigger area by expanding the // box in a way that does not touch any zero noise density cell. box = AttemptBoxExpansion(box, *noise_density_, gridsize()); } // All overlapped cells are non-zero, so just mark the rectangle. pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SET, NULL, 0, 0); } #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ScrollView::RED, ScrollView::RED); #endif // GRAPHICS_DISABLED // It is safe to delete the cblob now, as it isn't used by the grid // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the // dead_blobs list. // TODO(rays) delete the delete when the BLOBNBOX destructor deletes // the cblob. delete blob->cblob(); dead_it.add_to_end(blob_it.extract()); } } }
BOX box_next( //get bounding box BLOBNBOX_IT *it //iterator to blobds ) { BLOBNBOX *blob; //current blob BOX result; //total box blob = it->data (); result = blob->bounding_box (); do { it->forward (); blob = it->data (); if (blob->blob () == NULL && blob->cblob () == NULL) //was pre-chopped result += blob->bounding_box (); } //until next real blob while (blob->blob () == NULL && blob->cblob () == NULL || blob->joined_to_prev ()); return result; }
WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator int32_t blobcount, //no of blobs to use bool bol, //start of line uint8_t blanks //no of blanks ) { C_OUTLINE_IT cout_it; C_BLOB_LIST cblobs; C_BLOB_IT cblob_it = &cblobs; WERD *word; // new word BLOBNBOX *bblob; // current blob int32_t blobindex; // in row for (blobindex = 0; blobindex < blobcount; blobindex++) { bblob = box_it->extract(); if (bblob->joined_to_prev()) { if (bblob->cblob() != nullptr) { cout_it.set_to_list(cblob_it.data()->out_list()); cout_it.move_to_last(); cout_it.add_list_after(bblob->cblob()->out_list()); delete bblob->cblob(); } } else { if (bblob->cblob() != nullptr) cblob_it.add_after_then_move(bblob->cblob()); } delete bblob; box_it->forward(); // next one } if (blanks < 1) blanks = 1; word = new WERD(&cblobs, blanks, nullptr); if (bol) word->set_flag(W_BOL, true); if (box_it->at_first()) word->set_flag(W_EOL, true); // at end of line return word; }