// Internal version of EvaluateBox returns the unclipped gradients as well // as the result of EvaluateBox. // hgrad1 and hgrad2 are the gradients for the horizontal textline. int TextlineProjection::EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug, int* hgrad1, int* hgrad2, int* vgrad1, int* vgrad2) const { int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(), box.top(), true); int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(), box.bottom(), false); int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(), box.top(), true); int right_gradient = -BestMeanGradientInColumn(denorm, box.right(), box.bottom(), box.top(), false); int top_clipped = MAX(top_gradient, 0); int bottom_clipped = MAX(bottom_gradient, 0); int left_clipped = MAX(left_gradient, 0); int right_clipped = MAX(right_gradient, 0); if (debug) { tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:", top_gradient, bottom_gradient, left_gradient, right_gradient); box.print(); } int result = MAX(top_clipped, bottom_clipped) - MAX(left_clipped, right_clipped); if (hgrad1 != NULL && hgrad2 != NULL) { *hgrad1 = top_gradient; *hgrad2 = bottom_gradient; } if (vgrad1 != NULL && vgrad2 != NULL) { *vgrad1 = left_gradient; *vgrad2 = right_gradient; } return result; }
// Compute the distance from the from_box to the to_box using curved // projection space. Separation that involves a decrease in projection // density (moving from the from_box to the to_box) is weighted more heavily // than constant density, and an increase is weighted less. // If horizontal_textline is true, then curved space is used vertically, // as for a diacritic on the edge of a textline. // The projection uses original image coords, so denorm is used to get // back to the image coords from box/part space. // How the calculation works: Think of a diacritic near a textline. // Distance is measured from the far side of the from_box to the near side of // the to_box. Shown is the horizontal textline case. // |------^-----| // | from | box | // |------|-----| // perpendicular | // <------v-------->|--------------------| // parallel | to box | // |--------------------| // Perpendicular distance uses "curved space" See VerticalDistance below. // Parallel distance is linear. // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio. int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box, bool horizontal_textline, const DENORM* denorm, bool debug) const { // The parallel_gap is the horizontal gap between a horizontal textline and // the box. Analogous for vertical. int parallel_gap = 0; // start_pt is the box end of the line to be modified for curved space. TPOINT start_pt; // end_pt is the partition end of the line to be modified for curved space. TPOINT end_pt; if (horizontal_textline) { parallel_gap = from_box.x_gap(to_box) + from_box.width(); start_pt.x = (from_box.left() + from_box.right()) / 2; end_pt.x = start_pt.x; if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) { start_pt.y = from_box.top(); end_pt.y = MIN(to_box.top(), start_pt.y); } else { start_pt.y = from_box.bottom(); end_pt.y = MAX(to_box.bottom(), start_pt.y); } } else { parallel_gap = from_box.y_gap(to_box) + from_box.height(); if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) { start_pt.x = from_box.right(); end_pt.x = MIN(to_box.right(), start_pt.x); } else { start_pt.x = from_box.left(); end_pt.x = MAX(to_box.left(), start_pt.x); } start_pt.y = (from_box.bottom() + from_box.top()) / 2; end_pt.y = start_pt.y; } // The perpendicular gap is the max vertical distance gap out of: // top of from_box to to_box top and bottom of from_box to to_box bottom. // This value is then modified for curved projection space. // Analogous for vertical. int perpendicular_gap = 0; // If start_pt == end_pt, then the from_box lies entirely within the to_box // (in the perpendicular direction), so we don't need to calculate the // perpendicular_gap. if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) { if (denorm != NULL) { // Denormalize the start and end. denorm->DenormTransform(NULL, start_pt, &start_pt); denorm->DenormTransform(NULL, end_pt, &end_pt); } if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) { perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, end_pt.y); } else { perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, start_pt.y); } } // The parallel_gap weighs less than the perpendicular_gap. return perpendicular_gap + parallel_gap / kParaPerpDistRatio; }
QString printTBOX(TBOX box,int height, bool eol) { if (eol) return QString ("Bounding box=(%1,%2)->(%3,%4)\n").arg(box.left()) .arg(height - box.top()).arg(box.right()).arg(height - box.bottom()); else return QString ("Bounding box=(%1,%2)->(%3,%4)").arg(box.left()) .arg(height - box.top()).arg(box.right()).arg(height - box.bottom()); }
// Return the partner of this TabVector if the vector qualifies as // being a vertical text line, otherwise NULL. TabVector* TabVector::VerticalTextlinePartner() { if (!partners_.singleton()) return NULL; TabVector_C_IT partner_it(&partners_); TabVector* partner = partner_it.data(); BLOBNBOX_C_IT box_it1(&boxes_); BLOBNBOX_C_IT box_it2(&partner->boxes_); // Count how many boxes are also in the other list. // At the same time, gather the mean width and median vertical gap. if (textord_debug_tabfind > 1) { Print("Testing for vertical text"); partner->Print(" partner"); } int num_matched = 0; int num_unmatched = 0; int total_widths = 0; int width = startpt().x() - partner->startpt().x(); if (width < 0) width = -width; STATS gaps(0, width * 2); BLOBNBOX* prev_bbox = NULL; box_it2.mark_cycle_pt(); for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { BLOBNBOX* bbox = box_it1.data(); TBOX box = bbox->bounding_box(); if (prev_bbox != NULL) { gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); } while (!box_it2.cycled_list() && box_it2.data() != bbox && box_it2.data()->bounding_box().bottom() < box.bottom()) { box_it2.forward(); } if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN && (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN)) ++num_matched; else ++num_unmatched; total_widths += box.width(); prev_bbox = bbox; } if (num_unmatched + num_matched == 0) return NULL; double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; int min_box_match = static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio); bool is_vertical = (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap); if (textord_debug_tabfind > 1) { tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); } return (is_vertical) ? partner : NULL; }
// Initialize from box coordinates. POLY_BLOCK::POLY_BLOCK(const TBOX& box, PolyBlockType t) { vertices.clear(); ICOORDELT_IT v = &vertices; v.move_to_first(); v.add_to_end(new ICOORDELT(box.left(), box.top())); v.add_to_end(new ICOORDELT(box.left(), box.bottom())); v.add_to_end(new ICOORDELT(box.right(), box.bottom())); v.add_to_end(new ICOORDELT(box.right(), box.top())); compute_bb(); type = t; }
/** * @name start_seam_list * * Initialize a list of seams that match the original number of blobs * present in the starting segmentation. Each of the seams created * by this routine have location information only. */ void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array) { seam_array->truncate(0); TPOINT location; for (int b = 1; b < word->NumBlobs(); ++b) { TBOX bbox = word->blobs[b - 1]->bounding_box(); TBOX nbox = word->blobs[b]->bounding_box(); location.x = (bbox.right() + nbox.left()) / 2; location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4; seam_array->push_back(new SEAM(0.0f, location, NULL, NULL, NULL)); } }
bool PIXROW::bad_box( //return true if box exceeds image int xsize, int ysize) const { TBOX bbox = bounding_box (); if (bbox.left () < 0 || bbox.right () > xsize || bbox.top () > ysize || bbox.bottom () < 0) { tprintf("Box (%d,%d)->(%d,%d) bad compared to %d,%d\n", bbox.left(),bbox.bottom(), bbox.right(), bbox.top(), xsize, ysize); return true; } return false; }
/********************************************************************** * render_segmentation * * Create a list of line segments that represent the list of chunks * using the correct segmentation that was supplied as input. **********************************************************************/ void render_segmentation(ScrollView *window, TBLOB *chunks, SEARCH_STATE segmentation) { TBLOB *blob; C_COL color = Black; int char_num = -1; int chunks_left = 0; TBOX bbox; if (chunks) bbox = chunks->bounding_box(); for (blob = chunks; blob != NULL; blob = blob->next) { bbox += blob->bounding_box(); if (chunks_left-- == 0) { color = color_list[++char_num % NUM_COLORS]; if (char_num < segmentation[0]) chunks_left = segmentation[char_num + 1]; else chunks_left = MAX_INT32; } render_outline(window, blob->outlines, color); } window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); }
static void PrintBoxWidths(BLOBNBOX* neighbour) { TBOX nbox = neighbour->bounding_box(); tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter()); }
// Generates a TrainingSample from a TBLOB. Extracts features and sets // the bounding box, so classifiers that operate on the image can work. // TODO(rays) Make BlobToTrainingSample a member of Classify now that // the FlexFx and FeatureDescription code have been removed and LearnBlob // is now a member of Classify. TrainingSample* BlobToTrainingSample( const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, GenericVector<INT_FEATURE_STRUCT>* bl_features) { GenericVector<INT_FEATURE_STRUCT> cn_features; Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, &cn_features, fx_info, nullptr); // TODO(rays) Use blob->PreciseBoundingBox() instead. TBOX box = blob.bounding_box(); TrainingSample* sample = nullptr; int num_features = fx_info->NumCN; if (num_features > 0) { sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], num_features); } if (sample != nullptr) { // Set the bounding box (in original image coordinates) in the sample. TPOINT topleft, botright; topleft.x = box.left(); topleft.y = box.top(); botright.x = box.right(); botright.y = box.bottom(); TPOINT original_topleft, original_botright; blob.denorm().DenormTransform(nullptr, topleft, &original_topleft); blob.denorm().DenormTransform(nullptr, botright, &original_botright); sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y)); } return sample; }
// Sets up the DENORM to execute a non-linear transformation based on // preserving an even distribution of stroke edges. The transformation // operates only within the given box. // x_coords is a collection of the x-coords of vertical edges for each // y-coord starting at box.bottom(). // y_coords is a collection of the y-coords of horizontal edges for each // x-coord starting at box.left(). // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. // The second-level vectors must all be sorted in ascending order. // See comments on the helper functions above for more details. void DENORM::SetupNonLinear( const DENORM* predecessor, const TBOX& box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector<GenericVector<int> >& x_coords, const GenericVector<GenericVector<int> >& y_coords) { Clear(); predecessor_ = predecessor; // x_map_ and y_map_ store a mapping from input x and y coordinate to output // x and y coordinate, based on scaling to the supplied target_width and // target_height. x_map_ = new GenericVector<float>; y_map_ = new GenericVector<float>; // Set a 2-d image array to the run lengths at each pixel. int width = box.width(); int height = box.height(); GENERIC_2D_ARRAY<int> minruns(width, height, 0); ComputeRunlengthImage(box, x_coords, y_coords, &minruns); // Edge density is the sum of the inverses of the run lengths. Compute // edge density projection profiles. ComputeEdgeDensityProfiles(box, minruns, x_map_, y_map_); // Convert the edge density profiles to the coordinates by multiplying by // the desired size and accumulating. (*x_map_)[width] = target_width; for (int x = width - 1; x >= 0; --x) { (*x_map_)[x] = (*x_map_)[x + 1] - (*x_map_)[x] * target_width; } (*y_map_)[height] = target_height; for (int y = height - 1; y >= 0; --y) { (*y_map_)[y] = (*y_map_)[y + 1] - (*y_map_)[y] * target_height; } x_origin_ = box.left(); y_origin_ = box.bottom(); final_xshift_ = final_xshift; final_yshift_ = final_yshift; }
void OL_BUCKETS::extract_children( // recursive count C_OUTLINE *outline, // parent outline C_OUTLINE_IT *it // destination iterator ) { inT16 xmin, xmax; // coord limits inT16 ymin, ymax; inT16 xindex, yindex; // current bucket TBOX olbox; C_OUTLINE_IT child_it; // search iterator olbox = outline->bounding_box(); xmin =(olbox.left() - bl.x()) / BUCKETSIZE; xmax =(olbox.right() - bl.x()) / BUCKETSIZE; ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; ymax =(olbox.top() - bl.y()) / BUCKETSIZE; for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list(&buckets[yindex * bxdim + xindex]); for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { if (*child_it.data() < *outline) { it->add_after_then_move(child_it.extract()); } } } } }
float Tesseract::blob_noise_score(TBLOB *blob) { TBOX box; // BB of outline inT16 outline_count = 0; inT16 max_dimension; inT16 largest_outline_dimension = 0; for (TESSLINE* ol = blob->outlines; ol != NULL; ol= ol->next) { outline_count++; box = ol->bounding_box(); if (box.height() > box.width()) { max_dimension = box.height(); } else { max_dimension = box.width(); } if (largest_outline_dimension < max_dimension) largest_outline_dimension = max_dimension; } if (outline_count > 5) { // penalise LOTS of blobs largest_outline_dimension *= 2; } box = blob->bounding_box(); if (box.bottom() > kBlnBaselineOffset * 4 || box.top() < kBlnBaselineOffset / 2) { // Lax blob is if high or low largest_outline_dimension /= 2; } return largest_outline_dimension; }
float blob_noise_score(PBLOB *blob) { OUTLINE_IT outline_it; TBOX box; //BB of outline inT16 outline_count = 0; inT16 max_dimension; inT16 largest_outline_dimension = 0; outline_it.set_to_list (blob->out_list ()); for (outline_it.mark_cycle_pt (); !outline_it.cycled_list (); outline_it.forward ()) { outline_count++; box = outline_it.data ()->bounding_box (); if (box.height () > box.width ()) max_dimension = box.height (); else max_dimension = box.width (); if (largest_outline_dimension < max_dimension) largest_outline_dimension = max_dimension; } if (fixsp_noise_score_fixing) { if (outline_count > 5) //penalise LOTS of blobs largest_outline_dimension *= 2; box = blob->bounding_box (); if ((box.bottom () > bln_baseline_offset * 4) || (box.top () < bln_baseline_offset / 2)) //Lax blob is if high or low largest_outline_dimension /= 2; } return largest_outline_dimension; }
// Inserts a list of blobs into the projection. // Rotation is a multiple of 90 degrees to get from blob coords to // nontext_map coords, nontext_map_box is the bounds of the nontext_map. // Blobs are spread horizontally or vertically according to their internal // flags, but the spreading is truncated by set pixels in the nontext_map // and also by the horizontal rule line limits on the blobs. void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation, const TBOX& nontext_map_box, Pix* nontext_map) { BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX bbox = blob->bounding_box(); ICOORD middle((bbox.left() + bbox.right()) / 2, (bbox.bottom() + bbox.top()) / 2); bool spreading_horizontally = PadBlobBox(blob, &bbox); // Rotate to match the nontext_map. bbox.rotate(rotation); middle.rotate(rotation); if (rotation.x() == 0.0f) spreading_horizontally = !spreading_horizontally; // Clip to the image before applying the increments. bbox &= nontext_map_box; // This is in-place box intersection. // Check for image pixels before spreading. TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally, nontext_map, &bbox); if (bbox.area() > 0) { IncrementRectangle8Bit(bbox); } } }
// This function takes tif/box pair of files and runs recognition on the image, // while making sure that the word bounds that tesseract identified roughly // match to those specified by the input box file. For each word (ngram in a // single bounding box from the input box file) it outputs the ocred result, // the correct label, rating and certainty. void Tesseract::recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file) { STRING box_fname = fname; const char *lastdot = strrchr(box_fname.string(), '.'); if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0'; box_fname += ".box"; // read_next_box() will close box_file FILE *box_file = open_file(box_fname.string(), "r"); PAGE_RES_IT page_res_it; page_res_it.page_res = page_res; page_res_it.restart_page(); char label[kBoxReadBufSize]; // Process all the words on this page. TBOX tbox; // tesseract-identified box TBOX bbox; // box from the box file bool keep_going; int line_number = 0; do { keep_going = read_t(&page_res_it, &tbox); keep_going &= read_b(applybox_page, &line_number, box_file, label, &bbox); // Align bottom left points of the TBOXes. while (keep_going && !NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) { keep_going = (bbox.bottom() < tbox.bottom()) ? read_t(&page_res_it, &tbox) : read_b(applybox_page, &line_number, box_file, label, &bbox); } while (keep_going && !NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) { keep_going = (bbox.left() > tbox.left()) ? read_t(&page_res_it, &tbox) : read_b(applybox_page, &line_number, box_file, label, &bbox); } // OCR the word if top right points of the TBOXes are similar. if (keep_going && NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) && NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) { ambigs_classify_and_output(page_res_it.prev_word(), page_res_it.prev_row(), page_res_it.prev_block(), label, output_file); } } while (keep_going); }
// Extract the OCR results, costs (penalty points for uncertainty), // and the bounding boxes of the characters. static void extract_result(ELIST_ITERATOR *out, PAGE_RES* page_res) { PAGE_RES_IT page_res_it(page_res); int word_count = 0; while (page_res_it.word() != NULL) { WERD_RES *word = page_res_it.word(); const char *str = word->best_choice->string().string(); const char *len = word->best_choice->lengths().string(); if (word_count) add_space(out); TBOX bln_rect; PBLOB_LIST *blobs = word->outword->blob_list(); PBLOB_IT it(blobs); int n = strlen(len); TBOX** boxes_to_fix = new TBOX*[n]; for (int i = 0; i < n; i++) { PBLOB *blob = it.data(); TBOX current = pblob_get_bbox(blob); bln_rect.bounding_union(current); TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), str, *len); tc->box = current; boxes_to_fix[i] = &tc->box; out->add_after_then_move(tc); it.forward(); str += *len; len++; } // Find the word bbox before normalization. // Here we can't use the C_BLOB bboxes directly, // since connected letters are not yet cut. TBOX real_rect = c_blob_list_get_bbox(word->word->cblob_list()); // Denormalize boxes by transforming the bbox of the whole bln word // into the denorm bbox (`real_rect') of the whole word. double x_stretch = double(real_rect.width()) / bln_rect.width(); double y_stretch = double(real_rect.height()) / bln_rect.height(); for (int j = 0; j < n; j++) { TBOX *box = boxes_to_fix[j]; int x0 = int(real_rect.left() + x_stretch * (box->left() - bln_rect.left()) + 0.5); int x1 = int(real_rect.left() + x_stretch * (box->right() - bln_rect.left()) + 0.5); int y0 = int(real_rect.bottom() + y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5); int y1 = int(real_rect.bottom() + y_stretch * (box->top() - bln_rect.bottom()) + 0.5); *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1)); } delete [] boxes_to_fix; page_res_it.forward(); word_count++; } }
// Creates a box file string from a unichar string, TBOX and page number. void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num, STRING* box_str) { *box_str = unichar_str; box_str->add_str_int(" ", box.left()); box_str->add_str_int(" ", box.bottom()); box_str->add_str_int(" ", box.right()); box_str->add_str_int(" ", box.top()); box_str->add_str_int(" ", page_num); }
// Returns true if any cell value in the given rectangle is zero. bool IntGrid::AnyZeroInRect(const TBOX& rect) const { int min_x, min_y, max_x, max_y; GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); GridCoords(rect.right(), rect.top(), &max_x, &max_y); for (int y = min_y; y <= max_y; ++y) { for (int x = min_x; x <= max_x; ++x) { if (GridCellValue(x, y) == 0) return true; } } return false; }
// Setup for a baseline normalization. If there are segs, then they // are used, otherwise, if there is a row, that is used, otherwise the // bottom of the word_box is used for the baseline. void DENORM::SetupBLNormalize(const BLOCK* block, const ROW* row, float x_height, const TBOX& word_box, int num_segs, const DENORM_SEG* segs) { float scale = kBlnXHeight / x_height; float x_origin = (word_box.left() + word_box.right()) / 2.0f; float y_origin = 0.0f; if (num_segs == 0 && row == NULL) { y_origin = word_box.bottom(); } SetupNormalization(block, row, NULL, NULL, segs, num_segs, x_origin, y_origin, scale, scale, 0.0f, static_cast<float>(kBlnBaselineOffset)); }
void show_point(PAGE_RES* page_res, float x, float y) { FCOORD pt(x, y); PAGE_RES_IT pr_it(page_res); const int kBufsize = 512; char msg[kBufsize]; char *msg_ptr = msg; msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y); for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) { if (pr_it.row() != pr_it.prev_row() && pr_it.row()->row->bounding_box().contains(pt)) { msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", pr_it.row()->row->base_line(x)); } if (word->word->bounding_box().contains(pt)) { TBOX box = word->word->bounding_box(); msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(), box.top()); C_BLOB_IT cblob_it(word->word->cblob_list()); for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) { C_BLOB* cblob = cblob_it.data(); box = cblob->bounding_box(); if (box.contains(pt)) { msg_ptr += sprintf(msg_ptr, "CBlb(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(), box.top()); } } } } image_win->AddMessage(msg); }
/** * Returns the bounding rectangle of the current object at the given level in * the coordinates of the working image that is pix_binary(). * See comment on coordinate system above. * Returns false if there is no such object at the current position. */ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int* left, int* top, int* right, int* bottom) const { if (Empty(level)) return false; TBOX box; PARA *para = NULL; switch (level) { case RIL_BLOCK: box = it_->block()->block->bounding_box(); break; case RIL_PARA: para = it_->row()->row->para(); // explicit fall-through. case RIL_TEXTLINE: box = it_->row()->row->bounding_box(); break; case RIL_WORD: box = it_->word()->word->bounding_box(); break; case RIL_SYMBOL: if (cblob_it_ == NULL) box = it_->word()->box_word->BlobBox(blob_index_); else box = cblob_it_->data()->bounding_box(); } if (level == RIL_PARA) { PageIterator other = *this; other.Begin(); do { if (other.it_->block() && other.it_->block()->block == it_->block()->block && other.it_->row() && other.it_->row()->row && other.it_->row()->row->para() == para) { box = box.bounding_union(other.it_->row()->row->bounding_box()); } } while (other.Next(RIL_TEXTLINE)); } if (level != RIL_SYMBOL || cblob_it_ != NULL) box.rotate(it_->block()->block->re_rotation()); // Now we have a box in tesseract coordinates relative to the image rectangle, // we have to convert the coords to a top-down system. const int pix_height = pixGetHeight(tesseract_->pix_binary()); const int pix_width = pixGetWidth(tesseract_->pix_binary()); *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width); *top = ClipToRange(pix_height - box.top(), 0, pix_height); *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width); *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height); return true; }
/********************************************************************** * char_box_to_tbox * * Create a TBOX from a character bounding box. If nonzero, the * x_offset accounts for any additional padding of the word box that * should be taken into account. * **********************************************************************/ TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) { l_int32 left; l_int32 top; l_int32 width; l_int32 height; l_int32 right; l_int32 bottom; boxGetGeometry(char_box, &left, &top, &width, &height); left += word_box.left() - x_offset; right = left + width; top = word_box.bottom() + word_box.height() - top; bottom = top - height; return TBOX(left, bottom, right, top); }
// Returns the bounding box including the desired combination of upper and // lower noise/diacritic elements. TBOX WERD::restricted_bounding_box(bool upper_dots, bool lower_dots) const { TBOX box = true_bounding_box(); int bottom = box.bottom(); int top = box.top(); // This is a read-only iteration of the rejected blobs. C_BLOB_IT it(const_cast<C_BLOB_LIST*>(&rej_cblobs)); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TBOX dot_box = it.data()->bounding_box(); if ((upper_dots || dot_box.bottom() <= top) && (lower_dots || dot_box.top() >= bottom)) { box += dot_box; } } return box; }
// Helper function to add 1 to a rectangle in source image coords to the // internal projection pix_. void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) { int scaled_left = ImageXToProjectionX(box.left()); int scaled_top = ImageYToProjectionY(box.top()); int scaled_right = ImageXToProjectionX(box.right()); int scaled_bottom = ImageYToProjectionY(box.bottom()); int wpl = pixGetWpl(pix_); uint32_t* data = pixGetData(pix_) + scaled_top * wpl; for (int y = scaled_top; y <= scaled_bottom; ++y) { for (int x = scaled_left; x <= scaled_right; ++x) { int pixel = GET_DATA_BYTE(data, x); if (pixel < 255) SET_DATA_BYTE(data, x, pixel + 1); } data += wpl; } }
// Returns true if more than half the area of the rect is covered by grid // cells that are over the threshold. bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const { int min_x, min_y, max_x, max_y; GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); GridCoords(rect.right(), rect.top(), &max_x, &max_y); int total_area = 0; for (int y = min_y; y <= max_y; ++y) { for (int x = min_x; x <= max_x; ++x) { int value = GridCellValue(x, y); if (value > threshold) { TBOX cell_box(x * gridsize_, y * gridsize_, (x + 1) * gridsize_, (y + 1) * gridsize_); cell_box &= rect; // This is in-place box intersection. total_area += cell_box.area(); } } } return total_area * 2 > rect.area(); }
// Display the tab codes of the BLOBNBOXes in this grid. ScrollView* AlignedBlob::DisplayTabs(const char* window_name, ScrollView* tab_win) { #ifndef GRAPHICS_DISABLED if (tab_win == NULL) tab_win = MakeWindow(0, 50, window_name); // For every tab in the grid, display it. GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this); gsearch.StartFullSearch(); BLOBNBOX* bbox; while ((bbox = gsearch.NextFullSearch()) != NULL) { TBOX box = bbox->bounding_box(); int left_x = box.left(); int right_x = box.right(); int top_y = box.top(); int bottom_y = box.bottom(); TabType tabtype = bbox->left_tab_type(); if (tabtype != TT_NONE) { if (tabtype == TT_UNCONFIRMED) tab_win->Pen(ScrollView::BLUE); else if (tabtype == TT_CONFIRMED) tab_win->Pen(ScrollView::GREEN); else if (tabtype == TT_FAKE) tab_win->Pen(ScrollView::YELLOW); else tab_win->Pen(ScrollView::GREY); tab_win->Line(left_x, top_y, left_x, bottom_y); } tabtype = bbox->right_tab_type(); if (tabtype != TT_NONE) { if (tabtype == TT_UNCONFIRMED) tab_win->Pen(ScrollView::MAGENTA); else if (tabtype == TT_CONFIRMED) tab_win->Pen(ScrollView::RED); else if (tabtype == TT_FAKE) tab_win->Pen(ScrollView::ORANGE); else tab_win->Pen(ScrollView::GREY); tab_win->Line(right_x, top_y, right_x, bottom_y); } } tab_win->Update(); #endif return tab_win; }
/** * Given a recognized blob, see if a contiguous collection of sub-pieces * (chopped blobs) starting at its left might qualify as being a subscript * or superscript letter based only on y position. Also do this for the * right side. */ void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, int super_y_bottom, int sub_y_top, ScriptPos *leading_pos, int *num_leading_outliers, ScriptPos *trailing_pos, int *num_trailing_outliers) { ScriptPos sp_unused1, sp_unused2; int unused1, unused2; if (!leading_pos) leading_pos = &sp_unused1; if (!num_leading_outliers) num_leading_outliers = &unused1; if (!trailing_pos) trailing_pos = &sp_unused2; if (!num_trailing_outliers) num_trailing_outliers = &unused2; *num_leading_outliers = *num_trailing_outliers = 0; *leading_pos = *trailing_pos = SP_NORMAL; int chopped_start = LeadingUnicharsToChopped(word, rebuilt_blob_index); int num_chopped_pieces = word->best_state[rebuilt_blob_index]; ScriptPos last_pos = SP_NORMAL; int trailing_outliers = 0; for (int i = 0; i < num_chopped_pieces; i++) { TBOX box = word->chopped_word->blobs[chopped_start + i]->bounding_box(); ScriptPos pos = SP_NORMAL; if (box.bottom() >= super_y_bottom) { pos = SP_SUPERSCRIPT; } else if (box.top() <= sub_y_top) { pos = SP_SUBSCRIPT; } if (pos == SP_NORMAL) { if (trailing_outliers == i) { *num_leading_outliers = trailing_outliers; *leading_pos = last_pos; } trailing_outliers = 0; } else { if (pos == last_pos) { trailing_outliers++; } else { trailing_outliers = 1; } } last_pos = pos; } *num_trailing_outliers = trailing_outliers; *trailing_pos = last_pos; }
// Find a set of blobs that are aligned in the given vertical // direction with the given blob. Returns a list of aligned // blobs and the number in the list. // For other parameters see FindAlignedBlob below. int AlignedBlob::AlignTabs(const AlignedBlobParams& params, bool top_to_bottom, BLOBNBOX* bbox, BLOBNBOX_CLIST* good_points, int* end_y) { int ptcount = 0; BLOBNBOX_C_IT it(good_points); TBOX box = bbox->bounding_box(); bool debug = WithinTestRegion(2, box.left(), box.bottom()); if (debug) { tprintf("Starting alignment run at blob:"); box.print(); } int x_start = params.right_tab ? box.right() : box.left(); while (bbox != nullptr) { // Add the blob to the list if the appropriate side is a tab candidate, // or if we are working on a ragged tab. TabType type = params.right_tab ? bbox->right_tab_type() : bbox->left_tab_type(); if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && (it.empty() || it.data() != bbox)) { if (top_to_bottom) it.add_before_then_move(bbox); else it.add_after_then_move(bbox); ++ptcount; } // Find the next blob that is aligned with the current one. // FindAlignedBlob guarantees that forward progress will be made in the // top_to_bottom direction, and therefore eventually it will return nullptr, // making this while (bbox != nullptr) loop safe. bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); if (bbox != nullptr) { box = bbox->bounding_box(); if (!params.ragged) x_start = params.right_tab ? box.right() : box.left(); } } if (debug) { tprintf("Alignment run ended with %d pts at blob:", ptcount); box.print(); } return ptcount; }
// Displays the segmentation state of *this (if not the same as the last // one displayed) and waits for a click in the window. void WERD_CHOICE::DisplaySegmentation(TWERD* word) { #ifndef GRAPHICS_DISABLED // Number of different colors to draw with. const int kNumColors = 6; static ScrollView *segm_window = NULL; // Check the state against the static prev_drawn_state. static GenericVector<int> prev_drawn_state; bool already_done = prev_drawn_state.size() == length_; if (!already_done) prev_drawn_state.init_to_size(length_, 0); for (int i = 0; i < length_; ++i) { if (prev_drawn_state[i] != state_[i]) { already_done = false; } prev_drawn_state[i] = state_[i]; } if (already_done || word->blobs.empty()) return; // Create the window if needed. if (segm_window == NULL) { segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, 2000.0, 256.0, true); } else { segm_window->Clear(); } TBOX bbox; int blob_index = 0; for (int c = 0; c < length_; ++c) { ScrollView::Color color = static_cast<ScrollView::Color>(c % kNumColors + 3); for (int i = 0; i < state_[c]; ++i, ++blob_index) { TBLOB* blob = word->blobs[blob_index]; bbox += blob->bounding_box(); blob->plot(segm_window, color, color); } } segm_window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); segm_window->Update(); window_wait(segm_window); #endif }