// Sets up the DENORM to execute a non-linear transformation based on // preserving an even distribution of stroke edges. The transformation // operates only within the given box. // x_coords is a collection of the x-coords of vertical edges for each // y-coord starting at box.bottom(). // y_coords is a collection of the y-coords of horizontal edges for each // x-coord starting at box.left(). // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. // The second-level vectors must all be sorted in ascending order. // See comments on the helper functions above for more details. void DENORM::SetupNonLinear( const DENORM* predecessor, const TBOX& box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector<GenericVector<int> >& x_coords, const GenericVector<GenericVector<int> >& y_coords) { Clear(); predecessor_ = predecessor; // x_map_ and y_map_ store a mapping from input x and y coordinate to output // x and y coordinate, based on scaling to the supplied target_width and // target_height. x_map_ = new GenericVector<float>; y_map_ = new GenericVector<float>; // Set a 2-d image array to the run lengths at each pixel. int width = box.width(); int height = box.height(); GENERIC_2D_ARRAY<int> minruns(width, height, 0); ComputeRunlengthImage(box, x_coords, y_coords, &minruns); // Edge density is the sum of the inverses of the run lengths. Compute // edge density projection profiles. ComputeEdgeDensityProfiles(box, minruns, x_map_, y_map_); // Convert the edge density profiles to the coordinates by multiplying by // the desired size and accumulating. (*x_map_)[width] = target_width; for (int x = width - 1; x >= 0; --x) { (*x_map_)[x] = (*x_map_)[x + 1] - (*x_map_)[x] * target_width; } (*y_map_)[height] = target_height; for (int y = height - 1; y >= 0; --y) { (*y_map_)[y] = (*y_map_)[y + 1] - (*y_map_)[y] * target_height; } x_origin_ = box.left(); y_origin_ = box.bottom(); final_xshift_ = final_xshift; final_yshift_ = final_yshift; }
// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below, // but uses the median top/bottom for horizontal and median left/right for // vertical instead of the bounding box edges. // Evaluates for both horizontal and vertical and returns the best result, // with a positive value for horizontal and a negative value for vertical. int TextlineProjection::EvaluateColPartition(const ColPartition& part, const DENORM* denorm, bool debug) const { if (part.IsSingleton()) return EvaluateBox(part.bounding_box(), denorm, debug); // Test vertical orientation. TBOX box = part.bounding_box(); // Use the partition median for left/right. box.set_left(part.median_left()); box.set_right(part.median_right()); int vresult = EvaluateBox(box, denorm, debug); // Test horizontal orientation. box = part.bounding_box(); // Use the partition median for top/bottom. box.set_top(part.median_top()); box.set_bottom(part.median_bottom()); int hresult = EvaluateBox(box, denorm, debug); if (debug) { tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult); part.bounding_box().print(); part.Print(); } return hresult >= -vresult ? hresult : vresult; }
// Swaps the outlines of *this and next if needed to keep the centers in // increasing x. void TBLOB::CorrectBlobOrder(TBLOB* next) { TBOX box = bounding_box(); TBOX next_box = next->bounding_box(); if (box.x_middle() > next_box.x_middle()) { Swap(&outlines, &next->outlines); } }
static void PrintBoxWidths(BLOBNBOX* neighbour) { TBOX nbox = neighbour->bounding_box(); tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter()); }
void OL_BUCKETS::extract_children( // recursive count C_OUTLINE *outline, // parent outline C_OUTLINE_IT *it // destination iterator ) { inT16 xmin, xmax; // coord limits inT16 ymin, ymax; inT16 xindex, yindex; // current bucket TBOX olbox; C_OUTLINE_IT child_it; // search iterator olbox = outline->bounding_box(); xmin =(olbox.left() - bl.x()) / BUCKETSIZE; xmax =(olbox.right() - bl.x()) / BUCKETSIZE; ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; ymax =(olbox.top() - bl.y()) / BUCKETSIZE; for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list(&buckets[yindex * bxdim + xindex]); for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { if (*child_it.data() < *outline) { it->add_after_then_move(child_it.extract()); } } } } }
// Converts the run-length image (see above to the edge density profiles used // for scaling, thus: // ______________ // |7 1_1_1_1_1 7| = 5.28 // |1|5 5 1 5 5|1| = 3.8 // |1|2 2|1|2 2|1| = 5 // |1|2 2|1|2 2|1| = 5 // |1|2 2|1|2 2|1| = 5 // |1|2 2|1|2 2|1| = 5 // |1|5_5_1_5_5|1| = 3.8 // |7_1_1_1_1_1_7| = 5.28 // 6 4 4 8 4 4 6 // . . . . . . . // 2 4 4 0 4 4 2 // 8 8 // Each profile is the sum of the reciprocals of the pixels in the image in // the appropriate row or column, and these are then normalized to sum to 1. // On output hx, hy contain an extra element, which will eventually be used // to guarantee that the top/right edge of the box (and anything beyond) always // gets mapped to the maximum target coordinate. static void ComputeEdgeDensityProfiles(const TBOX& box, const GENERIC_2D_ARRAY<int>& minruns, GenericVector<float>* hx, GenericVector<float>* hy) { int width = box.width(); int height = box.height(); hx->init_to_size(width + 1, 0.0); hy->init_to_size(height + 1, 0.0); double total = 0.0; for (int iy = 0; iy < height; ++iy) { for (int ix = 0; ix < width; ++ix) { int run = minruns(ix, iy); if (run == 0) run = 1; float density = 1.0f / run; (*hx)[ix] += density; (*hy)[iy] += density; } total += (*hy)[iy]; } // Normalize each profile to sum to 1. if (total > 0.0) { for (int ix = 0; ix < width; ++ix) { (*hx)[ix] /= total; } for (int iy = 0; iy < height; ++iy) { (*hy)[iy] /= total; } } // There is an extra element in each array, so initialize to 1. (*hx)[width] = 1.0f; (*hy)[height] = 1.0f; }
/********************************************************************** * render_segmentation * * Create a list of line segments that represent the list of chunks * using the correct segmentation that was supplied as input. **********************************************************************/ void render_segmentation(ScrollView *window, TBLOB *chunks, SEARCH_STATE segmentation) { TBLOB *blob; C_COL color = Black; int char_num = -1; int chunks_left = 0; TBOX bbox; if (chunks) bbox = chunks->bounding_box(); for (blob = chunks; blob != NULL; blob = blob->next) { bbox += blob->bounding_box(); if (chunks_left-- == 0) { color = color_list[++char_num % NUM_COLORS]; if (char_num < segmentation[0]) chunks_left = segmentation[char_num + 1]; else chunks_left = MAX_INT32; } render_outline(window, blob->outlines, color); } window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); }
// Returns an Imagedata containing the image of the given box, // and ground truth boxes/truth text if available in the input. // The image is not normalized in any way. ImageData* Tesseract::GetLineData(const TBOX& line_box, const GenericVector<TBOX>& boxes, const GenericVector<STRING>& texts, int start_box, int end_box, const BLOCK& block) { TBOX revised_box; ImageData* image_data = GetRectImage(line_box, block, kImagePadding, &revised_box); if (image_data == NULL) return NULL; image_data->set_page_number(applybox_page); // Copy the boxes and shift them so they are relative to the image. FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y()); ICOORD shift = -revised_box.botleft(); GenericVector<TBOX> line_boxes; GenericVector<STRING> line_texts; for (int b = start_box; b < end_box; ++b) { TBOX box = boxes[b]; box.rotate(block_rotation); box.move(shift); line_boxes.push_back(box); line_texts.push_back(texts[b]); } GenericVector<int> page_numbers; page_numbers.init_to_size(line_boxes.size(), applybox_page); image_data->AddBoxes(line_boxes, line_texts, page_numbers); return image_data; }
void fixspace_dbg(WERD_RES *word) { TBOX box = word->word->bounding_box (); BOOL8 show_map_detail = FALSE; inT16 i; box.print (); #ifndef SECURE_NAMES tprintf (" \"%s\" ", word->best_choice->string ().string ()); tprintf ("Blob count: %d (word); %d/%d (outword)\n", word->word->gblob_list ()->length (), word->outword->gblob_list ()->length (), word->outword->rej_blob_list ()->length ()); word->reject_map.print (debug_fp); tprintf ("\n"); if (show_map_detail) { tprintf ("\"%s\"\n", word->best_choice->string ().string ()); for (i = 0; word->best_choice->string ()[i] != '\0'; i++) { tprintf ("**** \"%c\" ****\n", word->best_choice->string ()[i]); word->reject_map[i].full_print (debug_fp); } } tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); #endif }
// Find a set of blobs that are aligned in the given vertical // direction with the given blob. Returns a list of aligned // blobs and the number in the list. // For other parameters see FindAlignedBlob below. int AlignedBlob::AlignTabs(const AlignedBlobParams& params, bool top_to_bottom, BLOBNBOX* bbox, BLOBNBOX_CLIST* good_points, int* end_y) { int ptcount = 0; BLOBNBOX_C_IT it(good_points); TBOX box = bbox->bounding_box(); int x_start = params.right_tab ? box.right() : box.left(); while (bbox != NULL) { // Add the blob to the list if the appropriate side is a tab candidate, // or if we are working on a ragged tab. if (((params.right_tab && bbox->right_tab_type() != TT_NONE) || (!params.right_tab && bbox->left_tab_type() != TT_NONE) || params.ragged) && (it.empty() || it.data() != bbox)) { if (top_to_bottom) it.add_before_then_move(bbox); else it.add_after_then_move(bbox); ++ptcount; } // Find the next blob that is aligned with the current one. // FindAlignedBlob guarantees that forward progress will be made in the // top_to_bottom direction, and therefore eventually it will return NULL, // making this while (bbox != NULL) loop safe. bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); if (bbox != NULL) { box = bbox->bounding_box(); if (!params.ragged) x_start = params.right_tab ? box.right() : box.left(); } } return ptcount; }
/** Handles a click event in a display window. */ void StrokeWidth::HandleClick(int x, int y) { BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>::HandleClick(x, y); // Run a radial search for blobs that overlap. BlobGridSearch radsearch(this); radsearch.StartRadSearch(x, y, 1); BLOBNBOX* neighbour; FCOORD click(static_cast<float>(x), static_cast<float>(y)); while ((neighbour = radsearch.NextRadSearch()) != NULL) { TBOX nbox = neighbour->bounding_box(); if (nbox.contains(click) && neighbour->cblob() != NULL) { PrintBoxWidths(neighbour); if (neighbour->neighbour(BND_LEFT) != NULL) PrintBoxWidths(neighbour->neighbour(BND_LEFT)); if (neighbour->neighbour(BND_RIGHT) != NULL) PrintBoxWidths(neighbour->neighbour(BND_RIGHT)); if (neighbour->neighbour(BND_ABOVE) != NULL) PrintBoxWidths(neighbour->neighbour(BND_ABOVE)); if (neighbour->neighbour(BND_BELOW) != NULL) PrintBoxWidths(neighbour->neighbour(BND_BELOW)); int gaps[BND_COUNT]; neighbour->NeighbourGaps(gaps); tprintf("Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n" "Good= %d %d %d %d\n", gaps[BND_LEFT], gaps[BND_RIGHT], gaps[BND_ABOVE], gaps[BND_BELOW], neighbour->horz_possible(), neighbour->vert_possible(), neighbour->good_stroke_neighbour(BND_LEFT), neighbour->good_stroke_neighbour(BND_RIGHT), neighbour->good_stroke_neighbour(BND_ABOVE), neighbour->good_stroke_neighbour(BND_BELOW)); break; } } }
// Generates a TrainingSample from a TBLOB. Extracts features and sets // the bounding box, so classifiers that operate on the image can work. // TODO(rays) Make BlobToTrainingSample a member of Classify now that // the FlexFx and FeatureDescription code have been removed and LearnBlob // is now a member of Classify. TrainingSample* BlobToTrainingSample( const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, GenericVector<INT_FEATURE_STRUCT>* bl_features) { GenericVector<INT_FEATURE_STRUCT> cn_features; Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, &cn_features, fx_info, nullptr); // TODO(rays) Use blob->PreciseBoundingBox() instead. TBOX box = blob.bounding_box(); TrainingSample* sample = nullptr; int num_features = fx_info->NumCN; if (num_features > 0) { sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], num_features); } if (sample != nullptr) { // Set the bounding box (in original image coordinates) in the sample. TPOINT topleft, botright; topleft.x = box.left(); topleft.y = box.top(); botright.x = box.right(); botright.y = box.bottom(); TPOINT original_topleft, original_botright; blob.denorm().DenormTransform(nullptr, topleft, &original_topleft); blob.denorm().DenormTransform(nullptr, botright, &original_botright); sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y)); } return sample; }
// Computes the DENORMS for bl(baseline) and cn(character) normalization // during feature extraction. The input denorm describes the current state // of the blob, which is usually a baseline-normalized word. // The Transforms setup are as follows: // Baseline Normalized (bl) Output: // We center the grapheme by aligning the x-coordinate of its centroid with // x=128 and leaving the already-baseline-normalized y as-is. // // Character Normalized (cn) Output: // We align the grapheme's centroid at the origin and scale it // asymmetrically in x and y so that the 2nd moments are a standard value // (51.2) ie the result is vaguely square. // If classify_nonlinear_norm is true: // A non-linear normalization is setup that attempts to evenly distribute // edges across x and y. // // Some of the fields of fx_info are also setup: // Length: Total length of outline. // Rx: Rounded y second moment. (Reversed by convention.) // Ry: rounded x second moment. // Xmean: Rounded x center of mass of the blob. // Ymean: Rounded y center of mass of the blob. void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm, DENORM* bl_denorm, DENORM* cn_denorm, INT_FX_RESULT_STRUCT* fx_info) { // Compute 1st and 2nd moments of the original outline. FCOORD center, second_moments; int length = blob.ComputeMoments(¢er, &second_moments); if (fx_info != nullptr) { fx_info->Length = length; fx_info->Rx = IntCastRounded(second_moments.y()); fx_info->Ry = IntCastRounded(second_moments.x()); fx_info->Xmean = IntCastRounded(center.x()); fx_info->Ymean = IntCastRounded(center.y()); } // Setup the denorm for Baseline normalization. bl_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), 128.0f, 1.0f, 1.0f, 128.0f, 128.0f); // Setup the denorm for character normalization. if (nonlinear_norm) { GenericVector<GenericVector<int> > x_coords; GenericVector<GenericVector<int> > y_coords; TBOX box; blob.GetPreciseBoundingBox(&box); box.pad(1, 1); blob.GetEdgeCoords(box, &x_coords, &y_coords); cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX, 0.0f, 0.0f, x_coords, y_coords); } else { cn_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), center.y(), 51.2f / second_moments.x(), 51.2f / second_moments.y(), 128.0f, 128.0f); } }
// Extract the OCR results, costs (penalty points for uncertainty), // and the bounding boxes of the characters. static void extract_result(ELIST_ITERATOR *out, PAGE_RES* page_res) { PAGE_RES_IT page_res_it(page_res); int word_count = 0; while (page_res_it.word() != NULL) { WERD_RES *word = page_res_it.word(); const char *str = word->best_choice->string().string(); const char *len = word->best_choice->lengths().string(); if (word_count) add_space(out); TBOX bln_rect; PBLOB_LIST *blobs = word->outword->blob_list(); PBLOB_IT it(blobs); int n = strlen(len); TBOX** boxes_to_fix = new TBOX*[n]; for (int i = 0; i < n; i++) { PBLOB *blob = it.data(); TBOX current = pblob_get_bbox(blob); bln_rect.bounding_union(current); TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), str, *len); tc->box = current; boxes_to_fix[i] = &tc->box; out->add_after_then_move(tc); it.forward(); str += *len; len++; } // Find the word bbox before normalization. // Here we can't use the C_BLOB bboxes directly, // since connected letters are not yet cut. TBOX real_rect = c_blob_list_get_bbox(word->word->cblob_list()); // Denormalize boxes by transforming the bbox of the whole bln word // into the denorm bbox (`real_rect') of the whole word. double x_stretch = double(real_rect.width()) / bln_rect.width(); double y_stretch = double(real_rect.height()) / bln_rect.height(); for (int j = 0; j < n; j++) { TBOX *box = boxes_to_fix[j]; int x0 = int(real_rect.left() + x_stretch * (box->left() - bln_rect.left()) + 0.5); int x1 = int(real_rect.left() + x_stretch * (box->right() - bln_rect.left()) + 0.5); int y0 = int(real_rect.bottom() + y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5); int y1 = int(real_rect.bottom() + y_stretch * (box->top() - bln_rect.bottom()) + 0.5); *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1)); } delete [] boxes_to_fix; page_res_it.forward(); word_count++; } }
/// Helper to compute the dispute resolution metric. /// Disputed blob resolution. The aim is to give the blob to the most /// appropriate boxfile box. Most of the time it is obvious, but if /// two boxfile boxes overlap significantly it is not. If a small boxfile /// box takes most of the blob, and a large boxfile box does too, then /// we want the small boxfile box to get it, but if the small box /// is much smaller than the blob, we don't want it to get it. /// Details of the disputed blob resolution: /// Given a box with area A, and a blob with area B, with overlap area C, /// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum /// miss metric gets the blob. static double BoxMissMetric(const TBOX& box1, const TBOX& box2) { int overlap_area = box1.intersection(box2).area(); double miss_metric = box1.area()- overlap_area; miss_metric /= box1.area(); miss_metric *= box2.area() - overlap_area; miss_metric /= box2.area(); return miss_metric; }
// Build a fake outline, given just a bounding box and append to the list. void C_OUTLINE::FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines) { C_OUTLINE_IT ol_it(outlines); // Make a C_OUTLINE from the bounds. This is a bit of a hack, // as there is no outline, just a bounding box, but it works nicely. CRACKEDGE start; start.pos = box.topleft(); C_OUTLINE* outline = new C_OUTLINE(&start, box.topleft(), box.botright(), 0); ol_it.add_to_end(outline); }
// Return the partner of this TabVector if the vector qualifies as // being a vertical text line, otherwise NULL. TabVector* TabVector::VerticalTextlinePartner() { if (!partners_.singleton()) return NULL; TabVector_C_IT partner_it(&partners_); TabVector* partner = partner_it.data(); BLOBNBOX_C_IT box_it1(&boxes_); BLOBNBOX_C_IT box_it2(&partner->boxes_); // Count how many boxes are also in the other list. // At the same time, gather the mean width and median vertical gap. if (textord_debug_tabfind > 1) { Print("Testing for vertical text"); partner->Print(" partner"); } int num_matched = 0; int num_unmatched = 0; int total_widths = 0; int width = startpt().x() - partner->startpt().x(); if (width < 0) width = -width; STATS gaps(0, width * 2); BLOBNBOX* prev_bbox = NULL; box_it2.mark_cycle_pt(); for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { BLOBNBOX* bbox = box_it1.data(); TBOX box = bbox->bounding_box(); if (prev_bbox != NULL) { gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); } while (!box_it2.cycled_list() && box_it2.data() != bbox && box_it2.data()->bounding_box().bottom() < box.bottom()) { box_it2.forward(); } if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN && (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN)) ++num_matched; else ++num_unmatched; total_widths += box.width(); prev_bbox = bbox; } if (num_unmatched + num_matched == 0) return NULL; double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; int min_box_match = static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio); bool is_vertical = (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap); if (textord_debug_tabfind > 1) { tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); } return (is_vertical) ? partner : NULL; }
void make_illegal_segment( //find segmentation FPSEGPT_LIST *prev_list, //previous segments TBOX blob_box, //bounding box BLOBNBOX_IT blob_it, //iterator int16_t region_index, //number of segment int16_t pitch, //pitch estimate int16_t pitch_error, //tolerance FPSEGPT_LIST *seg_list //output list ) { int16_t x; //current coord int16_t min_x = 0; //in this region int16_t max_x = 0; int16_t offset; //dist to edge FPSEGPT *segpt; //segment point FPSEGPT *prevpt; //previous point float best_cost; //best path FPSEGPT_IT segpt_it = seg_list;//iterator //previous points FPSEGPT_IT prevpt_it = prev_list; best_cost = FLT_MAX; for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list (); prevpt_it.forward ()) { prevpt = prevpt_it.data (); if (prevpt->cost_function () < best_cost) { //find least best_cost = prevpt->cost_function (); min_x = prevpt->position (); max_x = min_x; //limits on coords } else if (prevpt->cost_function () == best_cost) { max_x = prevpt->position (); } } min_x += pitch - pitch_error; max_x += pitch + pitch_error; for (x = min_x; x <= max_x; x++) { while (x > blob_box.right ()) { blob_box = box_next (&blob_it); } offset = x - blob_box.left (); if (blob_box.right () - x < offset) offset = blob_box.right () - x; segpt = new FPSEGPT (x, FALSE, offset, region_index, pitch, pitch_error, prev_list); if (segpt->previous () != nullptr) { ASSERT_HOST (offset >= 0); fprintf (stderr, "made fake at %d\n", x); //make one up segpt_it.add_after_then_move (segpt); segpt->faked = TRUE; segpt->fake_count++; } else delete segpt; } }
// Creates a box file string from a unichar string, TBOX and page number. void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num, STRING* box_str) { *box_str = unichar_str; box_str->add_str_int(" ", box.left()); box_str->add_str_int(" ", box.bottom()); box_str->add_str_int(" ", box.right()); box_str->add_str_int(" ", box.top()); box_str->add_str_int(" ", page_num); }
// TODO(mezhirov) delete this function and replace with word->bounding_box() static TBOX c_blob_list_get_bbox(C_BLOB_LIST *cblobs) { TBOX result; C_BLOB_IT c_it(cblobs); for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { C_BLOB *blob = c_it.data(); //bboxes.push(tessy_rectangle(blob->bounding_box())); result.bounding_union(blob->bounding_box()); } return result; }
// Helper gets the image of a rectangle, using the block.re_rotation() if // needed to get to the image, and rotating the result back to horizontal // layout. (CJK characters will be on their left sides) The vertical text flag // is set in the returned ImageData if the text was originally vertical, which // can be used to invoke a different CJK recognition engine. The revised_box // is also returned to enable calculation of output bounding boxes. ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block, int padding, TBOX* revised_box) const { TBOX wbox = box; wbox.pad(padding, padding); *revised_box = wbox; // Number of clockwise 90 degree rotations needed to get back to tesseract // coords from the clipped image. int num_rotations = 0; if (block.re_rotation().y() > 0.0f) num_rotations = 1; else if (block.re_rotation().x() < 0.0f) num_rotations = 2; else if (block.re_rotation().y() < 0.0f) num_rotations = 3; // Handle two cases automatically: 1 the box came from the block, 2 the box // came from a box file, and refers to the image, which the block may not. if (block.bounding_box().major_overlap(*revised_box)) revised_box->rotate(block.re_rotation()); // Now revised_box always refers to the image. // BestPix is never colormapped, but may be of any depth. Pix* pix = BestPix(); int width = pixGetWidth(pix); int height = pixGetHeight(pix); TBOX image_box(0, 0, width, height); // Clip to image bounds; *revised_box &= image_box; if (revised_box->null_box()) return NULL; Box* clip_box = boxCreate(revised_box->left(), height - revised_box->top(), revised_box->width(), revised_box->height()); Pix* box_pix = pixClipRectangle(pix, clip_box, NULL); if (box_pix == NULL) return NULL; boxDestroy(&clip_box); if (num_rotations > 0) { Pix* rot_pix = pixRotateOrth(box_pix, num_rotations); pixDestroy(&box_pix); box_pix = rot_pix; } // Convert sub-8-bit images to 8 bit. int depth = pixGetDepth(box_pix); if (depth < 8) { Pix* grey; grey = pixConvertTo8(box_pix, false); pixDestroy(&box_pix); box_pix = grey; } bool vertical_text = false; if (num_rotations > 0) { // Rotated the clipped revised box back to internal coordinates. FCOORD rotation(block.re_rotation().x(), -block.re_rotation().y()); revised_box->rotate(rotation); if (num_rotations != 2) vertical_text = true; } return new ImageData(vertical_text, box_pix); }
// brief Get a bounding box of a PBLOB. // TODO(mezhirov) delete this function and replace with blob->bounding_box() static TBOX pblob_get_bbox(PBLOB *blob) { OUTLINE_LIST *outlines = blob->out_list(); OUTLINE_IT it(outlines); TBOX result; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { OUTLINE *outline = it.data(); outline->compute_bb(); result.bounding_union(outline->bounding_box()); } return result; }
// Tests each blob in the list to see if it is certain non-text using 2 // conditions: // 1. blob overlaps a cell with high value in noise_density_ (previously set // by ComputeNoiseDensity). // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This // condition is disabled with max_blob_overlaps == -1. // If it does, the blob is declared non-text, and is used to mark up the // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their // neighbours reset, as they may now point to deleted data. // WARNING: The blobs list blobs may be in the *this grid, but they are // not removed. If any deleted blobs might be in *this, then this must be // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. // If the win is not NULL, deleted blobs are drawn on it in red, and kept // blobs are drawn on it in ok_color. void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, int max_blob_overlaps, ScrollView* win, ScrollView::Color ok_color, Pix* nontext_mask) { int imageheight = tright().y() - bleft().x(); BLOBNBOX_IT blob_it(blobs); BLOBNBOX_LIST dead_blobs; BLOBNBOX_IT dead_it(&dead_blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX box = blob->bounding_box(); if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && (max_blob_overlaps < 0 || !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { blob->ClearNeighbours(); #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ok_color, ok_color); #endif // GRAPHICS_DISABLED } else { if (noise_density_->AnyZeroInRect(box)) { // There is a danger that the bounding box may overlap real text, so // we need to render the outline. Pix* blob_pix = blob->cblob()->render_outline(); pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SRC | PIX_DST, blob_pix, 0, 0); pixDestroy(&blob_pix); } else { if (box.area() < gridsize() * gridsize()) { // It is a really bad idea to make lots of small components in the // photo mask, so try to join it to a bigger area by expanding the // box in a way that does not touch any zero noise density cell. box = AttemptBoxExpansion(box, *noise_density_, gridsize()); } // All overlapped cells are non-zero, so just mark the rectangle. pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SET, NULL, 0, 0); } #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ScrollView::RED, ScrollView::RED); #endif // GRAPHICS_DISABLED // It is safe to delete the cblob now, as it isn't used by the grid // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the // dead_blobs list. // TODO(rays) delete the delete when the BLOBNBOX destructor deletes // the cblob. delete blob->cblob(); dead_it.add_to_end(blob_it.extract()); } } }
// Adds edges to the given vectors. // For all the edge steps in all the outlines, or polygonal approximation // where there are no edge steps, collects the steps into x_coords/y_coords. // x_coords is a collection of the x-coords of vertical edges for each // y-coord starting at box.bottom(). // y_coords is a collection of the y-coords of horizontal edges for each // x-coord starting at box.left(). // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. void TBLOB::GetEdgeCoords(const TBOX& box, GenericVector<GenericVector<int> >* x_coords, GenericVector<GenericVector<int> >* y_coords) const { GenericVector<int> empty; x_coords->init_to_size(box.height(), empty); y_coords->init_to_size(box.width(), empty); CollectEdges(box, nullptr, nullptr, x_coords, y_coords); // Sort the output vectors. for (int i = 0; i < x_coords->size(); ++i) (*x_coords)[i].sort(); for (int i = 0; i < y_coords->size(); ++i) (*y_coords)[i].sort(); }
// Returns true if any cell value in the given rectangle is zero. bool IntGrid::AnyZeroInRect(const TBOX& rect) const { int min_x, min_y, max_x, max_y; GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); GridCoords(rect.right(), rect.top(), &max_x, &max_y); for (int y = min_y; y <= max_y; ++y) { for (int x = min_x; x <= max_x; ++x) { if (GridCellValue(x, y) == 0) return true; } } return false; }
// Setup for a baseline normalization. If there are segs, then they // are used, otherwise, if there is a row, that is used, otherwise the // bottom of the word_box is used for the baseline. void DENORM::SetupBLNormalize(const BLOCK* block, const ROW* row, float x_height, const TBOX& word_box, int num_segs, const DENORM_SEG* segs) { float scale = kBlnXHeight / x_height; float x_origin = (word_box.left() + word_box.right()) / 2.0f; float y_origin = 0.0f; if (num_segs == 0 && row == NULL) { y_origin = word_box.bottom(); } SetupNormalization(block, row, NULL, NULL, segs, num_segs, x_origin, y_origin, scale, scale, 0.0f, static_cast<float>(kBlnBaselineOffset)); }
PBLOB::PBLOB( //constructor C_BLOB *cblob, //compact blob float xheight //height of line ) { TBOX bbox; //bounding box if (!cblob->out_list ()->empty ()) { //get bounding box bbox = cblob->bounding_box (); if (bbox.height () > xheight) xheight = bbox.height (); //max of line and blob //copy it approximate_outline_list (cblob->out_list (), &outlines, xheight); } }
/********************************************************************** * char_box_to_tbox * * Create a TBOX from a character bounding box. If nonzero, the * x_offset accounts for any additional padding of the word box that * should be taken into account. * **********************************************************************/ TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) { l_int32 left; l_int32 top; l_int32 width; l_int32 height; l_int32 right; l_int32 bottom; boxGetGeometry(char_box, &left, &top, &width, &height); left += word_box.left() - x_offset; right = left + width; top = word_box.bottom() + word_box.height() - top; bottom = top - height; return TBOX(left, bottom, right, top); }
// Returns the bounding box including the desired combination of upper and // lower noise/diacritic elements. TBOX WERD::restricted_bounding_box(bool upper_dots, bool lower_dots) const { TBOX box = true_bounding_box(); int bottom = box.bottom(); int top = box.top(); // This is a read-only iteration of the rejected blobs. C_BLOB_IT it(const_cast<C_BLOB_LIST*>(&rej_cblobs)); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TBOX dot_box = it.data()->bounding_box(); if ((upper_dots || dot_box.bottom() <= top) && (lower_dots || dot_box.top() >= bottom)) { box += dot_box; } } return box; }
// Helper for SetupNonLinear computes an image of shortest run-lengths from // the x/y edges provided. // Based on "A nonlinear normalization method for handprinted Kanji character // recognition -- line density equalization" by Hiromitsu Yamada et al. // Eg below is an O in a 1-pixel margin-ed bounding box and the corresponding // ______________ input x_coords and y_coords. // | _________ | <empty> // | | _ | | 1, 6 // | | | | | | 1, 3, 4, 6 // | | | | | | 1, 3, 4, 6 // | | | | | | 1, 3, 4, 6 // | | |_| | | 1, 3, 4, 6 // | |_________| | 1, 6 // |_____________| <empty> // E 1 1 1 1 1 E // m 7 7 2 7 7 m // p 6 p // t 7 t // y y // The output image contains the min of the x and y run-length (distance // between edges) at each coordinate in the image thus: // ______________ // |7 1_1_1_1_1 7| // |1|5 5 1 5 5|1| // |1|2 2|1|2 2|1| // |1|2 2|1|2 2|1| // |1|2 2|1|2 2|1| // |1|2 2|1|2 2|1| // |1|5_5_1_5_5|1| // |7_1_1_1_1_1_7| // Note that the input coords are all integer, so all partial pixels are dealt // with elsewhere. Although it is nice for outlines to be properly connected // and continuous, there is no requirement that they be as such, so they could // have been derived from a flaky source, such as greyscale. // This function works only within the provided box, and it is assumed that the // input x_coords and y_coords have already been translated to have the bottom- // left of box as the origin. Although an output, the minruns should have been // pre-initialized to be the same size as box. Each element will contain the // minimum of x and y run-length as shown above. static void ComputeRunlengthImage( const TBOX& box, const GenericVector<GenericVector<int> >& x_coords, const GenericVector<GenericVector<int> >& y_coords, GENERIC_2D_ARRAY<int>* minruns) { int width = box.width(); int height = box.height(); ASSERT_HOST(minruns->dim1() == width); ASSERT_HOST(minruns->dim2() == height); // Set a 2-d image array to the run lengths at each pixel. for (int ix = 0; ix < width; ++ix) { int y = 0; for (int i = 0; i < y_coords[ix].size(); ++i) { int y_edge = ClipToRange(y_coords[ix][i], 0, height); int gap = y_edge - y; // Every pixel between the last and current edge get set to the gap. while (y < y_edge) { (*minruns)(ix, y) = gap; ++y; } } // Pretend there is a bounding box of edges all around the image. int gap = height - y; while (y < height) { (*minruns)(ix, y) = gap; ++y; } } // Now set the image pixels the the MIN of the x and y runlengths. for (int iy = 0; iy < height; ++iy) { int x = 0; for (int i = 0; i < x_coords[iy].size(); ++i) { int x_edge = ClipToRange(x_coords[iy][i], 0, width); int gap = x_edge - x; while (x < x_edge) { if (gap < (*minruns)(x, iy)) (*minruns)(x, iy) = gap; ++x; } } int gap = width - x; while (x < width) { if (gap < (*minruns)(x, iy)) (*minruns)(x, iy) = gap; ++x; } } }