// Internal version of EvaluateBox returns the unclipped gradients as well // as the result of EvaluateBox. // hgrad1 and hgrad2 are the gradients for the horizontal textline. int TextlineProjection::EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug, int* hgrad1, int* hgrad2, int* vgrad1, int* vgrad2) const { int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(), box.top(), true); int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(), box.bottom(), false); int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(), box.top(), true); int right_gradient = -BestMeanGradientInColumn(denorm, box.right(), box.bottom(), box.top(), false); int top_clipped = MAX(top_gradient, 0); int bottom_clipped = MAX(bottom_gradient, 0); int left_clipped = MAX(left_gradient, 0); int right_clipped = MAX(right_gradient, 0); if (debug) { tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:", top_gradient, bottom_gradient, left_gradient, right_gradient); box.print(); } int result = MAX(top_clipped, bottom_clipped) - MAX(left_clipped, right_clipped); if (hgrad1 != NULL && hgrad2 != NULL) { *hgrad1 = top_gradient; *hgrad2 = bottom_gradient; } if (vgrad1 != NULL && vgrad2 != NULL) { *vgrad1 = left_gradient; *vgrad2 = right_gradient; } return result; }
/********************************************************************** * blobs_widths * * Compute the widths of a list of blobs. Return an array of the widths * and gaps. **********************************************************************/ WIDTH_RECORD *blobs_widths(TBLOB *blobs) { /*blob to compute on */ WIDTH_RECORD *width_record; TPOINT topleft; /*bounding box */ TPOINT botright; int i = 0; int blob_end; int num_blobs = count_blobs (blobs); /* Get memory */ width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2); width_record->num_chars = num_blobs; TBOX bbox = blobs->bounding_box(); width_record->widths[i++] = bbox.width(); /* First width */ blob_end = bbox.right(); for (TBLOB* blob = blobs->next; blob != NULL; blob = blob->next) { TBOX curbox = blob->bounding_box(); width_record->widths[i++] = curbox.left() - blob_end; width_record->widths[i++] = curbox.width(); blob_end = curbox.right(); } return width_record; }
// Find a set of blobs that are aligned in the given vertical // direction with the given blob. Returns a list of aligned // blobs and the number in the list. // For other parameters see FindAlignedBlob below. int AlignedBlob::AlignTabs(const AlignedBlobParams& params, bool top_to_bottom, BLOBNBOX* bbox, BLOBNBOX_CLIST* good_points, int* end_y) { int ptcount = 0; BLOBNBOX_C_IT it(good_points); TBOX box = bbox->bounding_box(); int x_start = params.right_tab ? box.right() : box.left(); while (bbox != NULL) { // Add the blob to the list if the appropriate side is a tab candidate, // or if we are working on a ragged tab. if (((params.right_tab && bbox->right_tab_type() != TT_NONE) || (!params.right_tab && bbox->left_tab_type() != TT_NONE) || params.ragged) && (it.empty() || it.data() != bbox)) { if (top_to_bottom) it.add_before_then_move(bbox); else it.add_after_then_move(bbox); ++ptcount; } // Find the next blob that is aligned with the current one. // FindAlignedBlob guarantees that forward progress will be made in the // top_to_bottom direction, and therefore eventually it will return NULL, // making this while (bbox != NULL) loop safe. bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); if (bbox != NULL) { box = bbox->bounding_box(); if (!params.ragged) x_start = params.right_tab ? box.right() : box.left(); } } return ptcount; }
// Compute the distance from the from_box to the to_box using curved // projection space. Separation that involves a decrease in projection // density (moving from the from_box to the to_box) is weighted more heavily // than constant density, and an increase is weighted less. // If horizontal_textline is true, then curved space is used vertically, // as for a diacritic on the edge of a textline. // The projection uses original image coords, so denorm is used to get // back to the image coords from box/part space. // How the calculation works: Think of a diacritic near a textline. // Distance is measured from the far side of the from_box to the near side of // the to_box. Shown is the horizontal textline case. // |------^-----| // | from | box | // |------|-----| // perpendicular | // <------v-------->|--------------------| // parallel | to box | // |--------------------| // Perpendicular distance uses "curved space" See VerticalDistance below. // Parallel distance is linear. // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio. int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box, bool horizontal_textline, const DENORM* denorm, bool debug) const { // The parallel_gap is the horizontal gap between a horizontal textline and // the box. Analogous for vertical. int parallel_gap = 0; // start_pt is the box end of the line to be modified for curved space. TPOINT start_pt; // end_pt is the partition end of the line to be modified for curved space. TPOINT end_pt; if (horizontal_textline) { parallel_gap = from_box.x_gap(to_box) + from_box.width(); start_pt.x = (from_box.left() + from_box.right()) / 2; end_pt.x = start_pt.x; if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) { start_pt.y = from_box.top(); end_pt.y = MIN(to_box.top(), start_pt.y); } else { start_pt.y = from_box.bottom(); end_pt.y = MAX(to_box.bottom(), start_pt.y); } } else { parallel_gap = from_box.y_gap(to_box) + from_box.height(); if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) { start_pt.x = from_box.right(); end_pt.x = MIN(to_box.right(), start_pt.x); } else { start_pt.x = from_box.left(); end_pt.x = MAX(to_box.left(), start_pt.x); } start_pt.y = (from_box.bottom() + from_box.top()) / 2; end_pt.y = start_pt.y; } // The perpendicular gap is the max vertical distance gap out of: // top of from_box to to_box top and bottom of from_box to to_box bottom. // This value is then modified for curved projection space. // Analogous for vertical. int perpendicular_gap = 0; // If start_pt == end_pt, then the from_box lies entirely within the to_box // (in the perpendicular direction), so we don't need to calculate the // perpendicular_gap. if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) { if (denorm != NULL) { // Denormalize the start and end. denorm->DenormTransform(NULL, start_pt, &start_pt); denorm->DenormTransform(NULL, end_pt, &end_pt); } if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) { perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, end_pt.y); } else { perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, start_pt.y); } } // The parallel_gap weighs less than the perpendicular_gap. return perpendicular_gap + parallel_gap / kParaPerpDistRatio; }
QString printTBOX(TBOX box,int height, bool eol) { if (eol) return QString ("Bounding box=(%1,%2)->(%3,%4)\n").arg(box.left()) .arg(height - box.top()).arg(box.right()).arg(height - box.bottom()); else return QString ("Bounding box=(%1,%2)->(%3,%4)").arg(box.left()) .arg(height - box.top()).arg(box.right()).arg(height - box.bottom()); }
void make_illegal_segment( //find segmentation FPSEGPT_LIST *prev_list, //previous segments TBOX blob_box, //bounding box BLOBNBOX_IT blob_it, //iterator int16_t region_index, //number of segment int16_t pitch, //pitch estimate int16_t pitch_error, //tolerance FPSEGPT_LIST *seg_list //output list ) { int16_t x; //current coord int16_t min_x = 0; //in this region int16_t max_x = 0; int16_t offset; //dist to edge FPSEGPT *segpt; //segment point FPSEGPT *prevpt; //previous point float best_cost; //best path FPSEGPT_IT segpt_it = seg_list;//iterator //previous points FPSEGPT_IT prevpt_it = prev_list; best_cost = FLT_MAX; for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list (); prevpt_it.forward ()) { prevpt = prevpt_it.data (); if (prevpt->cost_function () < best_cost) { //find least best_cost = prevpt->cost_function (); min_x = prevpt->position (); max_x = min_x; //limits on coords } else if (prevpt->cost_function () == best_cost) { max_x = prevpt->position (); } } min_x += pitch - pitch_error; max_x += pitch + pitch_error; for (x = min_x; x <= max_x; x++) { while (x > blob_box.right ()) { blob_box = box_next (&blob_it); } offset = x - blob_box.left (); if (blob_box.right () - x < offset) offset = blob_box.right () - x; segpt = new FPSEGPT (x, FALSE, offset, region_index, pitch, pitch_error, prev_list); if (segpt->previous () != nullptr) { ASSERT_HOST (offset >= 0); fprintf (stderr, "made fake at %d\n", x); //make one up segpt_it.add_after_then_move (segpt); segpt->faked = TRUE; segpt->fake_count++; } else delete segpt; } }
bool PIXROW::bad_box( //return true if box exceeds image int xsize, int ysize) const { TBOX bbox = bounding_box (); if (bbox.left () < 0 || bbox.right () > xsize || bbox.top () > ysize || bbox.bottom () < 0) { tprintf("Box (%d,%d)->(%d,%d) bad compared to %d,%d\n", bbox.left(),bbox.bottom(), bbox.right(), bbox.top(), xsize, ysize); return true; } return false; }
static void PrintBoxWidths(BLOBNBOX* neighbour) { TBOX nbox = neighbour->bounding_box(); tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter()); }
// Inserts a list of blobs into the projection. // Rotation is a multiple of 90 degrees to get from blob coords to // nontext_map coords, nontext_map_box is the bounds of the nontext_map. // Blobs are spread horizontally or vertically according to their internal // flags, but the spreading is truncated by set pixels in the nontext_map // and also by the horizontal rule line limits on the blobs. void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation, const TBOX& nontext_map_box, Pix* nontext_map) { BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX bbox = blob->bounding_box(); ICOORD middle((bbox.left() + bbox.right()) / 2, (bbox.bottom() + bbox.top()) / 2); bool spreading_horizontally = PadBlobBox(blob, &bbox); // Rotate to match the nontext_map. bbox.rotate(rotation); middle.rotate(rotation); if (rotation.x() == 0.0f) spreading_horizontally = !spreading_horizontally; // Clip to the image before applying the increments. bbox &= nontext_map_box; // This is in-place box intersection. // Check for image pixels before spreading. TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally, nontext_map, &bbox); if (bbox.area() > 0) { IncrementRectangle8Bit(bbox); } } }
// Generates a TrainingSample from a TBLOB. Extracts features and sets // the bounding box, so classifiers that operate on the image can work. // TODO(rays) Make BlobToTrainingSample a member of Classify now that // the FlexFx and FeatureDescription code have been removed and LearnBlob // is now a member of Classify. TrainingSample* BlobToTrainingSample( const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, GenericVector<INT_FEATURE_STRUCT>* bl_features) { GenericVector<INT_FEATURE_STRUCT> cn_features; Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, &cn_features, fx_info, nullptr); // TODO(rays) Use blob->PreciseBoundingBox() instead. TBOX box = blob.bounding_box(); TrainingSample* sample = nullptr; int num_features = fx_info->NumCN; if (num_features > 0) { sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], num_features); } if (sample != nullptr) { // Set the bounding box (in original image coordinates) in the sample. TPOINT topleft, botright; topleft.x = box.left(); topleft.y = box.top(); botright.x = box.right(); botright.y = box.bottom(); TPOINT original_topleft, original_botright; blob.denorm().DenormTransform(nullptr, topleft, &original_topleft); blob.denorm().DenormTransform(nullptr, botright, &original_botright); sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y)); } return sample; }
void OL_BUCKETS::extract_children( // recursive count C_OUTLINE *outline, // parent outline C_OUTLINE_IT *it // destination iterator ) { inT16 xmin, xmax; // coord limits inT16 ymin, ymax; inT16 xindex, yindex; // current bucket TBOX olbox; C_OUTLINE_IT child_it; // search iterator olbox = outline->bounding_box(); xmin =(olbox.left() - bl.x()) / BUCKETSIZE; xmax =(olbox.right() - bl.x()) / BUCKETSIZE; ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; ymax =(olbox.top() - bl.y()) / BUCKETSIZE; for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list(&buckets[yindex * bxdim + xindex]); for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { if (*child_it.data() < *outline) { it->add_after_then_move(child_it.extract()); } } } } }
/********************************************************************** * render_segmentation * * Create a list of line segments that represent the list of chunks * using the correct segmentation that was supplied as input. **********************************************************************/ void render_segmentation(ScrollView *window, TBLOB *chunks, SEARCH_STATE segmentation) { TBLOB *blob; C_COL color = Black; int char_num = -1; int chunks_left = 0; TBOX bbox; if (chunks) bbox = chunks->bounding_box(); for (blob = chunks; blob != NULL; blob = blob->next) { bbox += blob->bounding_box(); if (chunks_left-- == 0) { color = color_list[++char_num % NUM_COLORS]; if (char_num < segmentation[0]) chunks_left = segmentation[char_num + 1]; else chunks_left = MAX_INT32; } render_outline(window, blob->outlines, color); } window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); }
// This function takes tif/box pair of files and runs recognition on the image, // while making sure that the word bounds that tesseract identified roughly // match to those specified by the input box file. For each word (ngram in a // single bounding box from the input box file) it outputs the ocred result, // the correct label, rating and certainty. void Tesseract::recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file) { STRING box_fname = fname; const char *lastdot = strrchr(box_fname.string(), '.'); if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0'; box_fname += ".box"; // read_next_box() will close box_file FILE *box_file = open_file(box_fname.string(), "r"); PAGE_RES_IT page_res_it; page_res_it.page_res = page_res; page_res_it.restart_page(); char label[kBoxReadBufSize]; // Process all the words on this page. TBOX tbox; // tesseract-identified box TBOX bbox; // box from the box file bool keep_going; int line_number = 0; do { keep_going = read_t(&page_res_it, &tbox); keep_going &= read_b(applybox_page, &line_number, box_file, label, &bbox); // Align bottom left points of the TBOXes. while (keep_going && !NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) { keep_going = (bbox.bottom() < tbox.bottom()) ? read_t(&page_res_it, &tbox) : read_b(applybox_page, &line_number, box_file, label, &bbox); } while (keep_going && !NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) { keep_going = (bbox.left() > tbox.left()) ? read_t(&page_res_it, &tbox) : read_b(applybox_page, &line_number, box_file, label, &bbox); } // OCR the word if top right points of the TBOXes are similar. if (keep_going && NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) && NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) { ambigs_classify_and_output(page_res_it.prev_word(), page_res_it.prev_row(), page_res_it.prev_block(), label, output_file); } } while (keep_going); }
// Extract the OCR results, costs (penalty points for uncertainty), // and the bounding boxes of the characters. static void extract_result(ELIST_ITERATOR *out, PAGE_RES* page_res) { PAGE_RES_IT page_res_it(page_res); int word_count = 0; while (page_res_it.word() != NULL) { WERD_RES *word = page_res_it.word(); const char *str = word->best_choice->string().string(); const char *len = word->best_choice->lengths().string(); if (word_count) add_space(out); TBOX bln_rect; PBLOB_LIST *blobs = word->outword->blob_list(); PBLOB_IT it(blobs); int n = strlen(len); TBOX** boxes_to_fix = new TBOX*[n]; for (int i = 0; i < n; i++) { PBLOB *blob = it.data(); TBOX current = pblob_get_bbox(blob); bln_rect.bounding_union(current); TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), str, *len); tc->box = current; boxes_to_fix[i] = &tc->box; out->add_after_then_move(tc); it.forward(); str += *len; len++; } // Find the word bbox before normalization. // Here we can't use the C_BLOB bboxes directly, // since connected letters are not yet cut. TBOX real_rect = c_blob_list_get_bbox(word->word->cblob_list()); // Denormalize boxes by transforming the bbox of the whole bln word // into the denorm bbox (`real_rect') of the whole word. double x_stretch = double(real_rect.width()) / bln_rect.width(); double y_stretch = double(real_rect.height()) / bln_rect.height(); for (int j = 0; j < n; j++) { TBOX *box = boxes_to_fix[j]; int x0 = int(real_rect.left() + x_stretch * (box->left() - bln_rect.left()) + 0.5); int x1 = int(real_rect.left() + x_stretch * (box->right() - bln_rect.left()) + 0.5); int y0 = int(real_rect.bottom() + y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5); int y1 = int(real_rect.bottom() + y_stretch * (box->top() - bln_rect.bottom()) + 0.5); *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1)); } delete [] boxes_to_fix; page_res_it.forward(); word_count++; } }
// Creates a box file string from a unichar string, TBOX and page number. void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num, STRING* box_str) { *box_str = unichar_str; box_str->add_str_int(" ", box.left()); box_str->add_str_int(" ", box.bottom()); box_str->add_str_int(" ", box.right()); box_str->add_str_int(" ", box.top()); box_str->add_str_int(" ", page_num); }
// Find a set of blobs that are aligned in the given vertical // direction with the given blob. Returns a list of aligned // blobs and the number in the list. // For other parameters see FindAlignedBlob below. int AlignedBlob::AlignTabs(const AlignedBlobParams& params, bool top_to_bottom, BLOBNBOX* bbox, BLOBNBOX_CLIST* good_points, int* end_y) { int ptcount = 0; BLOBNBOX_C_IT it(good_points); TBOX box = bbox->bounding_box(); bool debug = WithinTestRegion(2, box.left(), box.bottom()); if (debug) { tprintf("Starting alignment run at blob:"); box.print(); } int x_start = params.right_tab ? box.right() : box.left(); while (bbox != nullptr) { // Add the blob to the list if the appropriate side is a tab candidate, // or if we are working on a ragged tab. TabType type = params.right_tab ? bbox->right_tab_type() : bbox->left_tab_type(); if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && (it.empty() || it.data() != bbox)) { if (top_to_bottom) it.add_before_then_move(bbox); else it.add_after_then_move(bbox); ++ptcount; } // Find the next blob that is aligned with the current one. // FindAlignedBlob guarantees that forward progress will be made in the // top_to_bottom direction, and therefore eventually it will return nullptr, // making this while (bbox != nullptr) loop safe. bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); if (bbox != nullptr) { box = bbox->bounding_box(); if (!params.ragged) x_start = params.right_tab ? box.right() : box.left(); } } if (debug) { tprintf("Alignment run ended with %d pts at blob:", ptcount); box.print(); } return ptcount; }
// Initialize from box coordinates. POLY_BLOCK::POLY_BLOCK(const TBOX& box, PolyBlockType t) { vertices.clear(); ICOORDELT_IT v = &vertices; v.move_to_first(); v.add_to_end(new ICOORDELT(box.left(), box.top())); v.add_to_end(new ICOORDELT(box.left(), box.bottom())); v.add_to_end(new ICOORDELT(box.right(), box.bottom())); v.add_to_end(new ICOORDELT(box.right(), box.top())); compute_bb(); type = t; }
// Normalizes the blob for classification only if needed. // (Normally this means a non-zero classify rotation.) // If no Normalization is needed, then NULL is returned, and the input blob // can be used directly. Otherwise a new TBLOB is returned which must be // deleted after use. TBLOB* TBLOB::ClassifyNormalizeIfNeeded() const { TBLOB* rotated_blob = NULL; // If necessary, copy the blob and rotate it. The rotation is always // +/- 90 degrees, as 180 was already taken care of. if (denorm_.block() != NULL && denorm_.block()->classify_rotation().y() != 0.0) { TBOX box = bounding_box(); int x_middle = (box.left() + box.right()) / 2; int y_middle = (box.top() + box.bottom()) / 2; rotated_blob = new TBLOB(*this); const FCOORD& rotation = denorm_.block()->classify_rotation(); // Move the rotated blob back to the same y-position so that we // can still distinguish similar glyphs with differeny y-position. float target_y = kBlnBaselineOffset + (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle); rotated_blob->Normalize(NULL, &rotation, &denorm_, x_middle, y_middle, 1.0f, 1.0f, 0.0f, target_y, denorm_.inverse(), denorm_.pix()); } return rotated_blob; }
// Returns true if any cell value in the given rectangle is zero. bool IntGrid::AnyZeroInRect(const TBOX& rect) const { int min_x, min_y, max_x, max_y; GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); GridCoords(rect.right(), rect.top(), &max_x, &max_y); for (int y = min_y; y <= max_y; ++y) { for (int x = min_x; x <= max_x; ++x) { if (GridCellValue(x, y) == 0) return true; } } return false; }
/** * @name start_seam_list * * Initialize a list of seams that match the original number of blobs * present in the starting segmentation. Each of the seams created * by this routine have location information only. */ void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array) { seam_array->truncate(0); TPOINT location; for (int b = 1; b < word->NumBlobs(); ++b) { TBOX bbox = word->blobs[b - 1]->bounding_box(); TBOX nbox = word->blobs[b]->bounding_box(); location.x = (bbox.right() + nbox.left()) / 2; location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4; seam_array->push_back(new SEAM(0.0f, location, NULL, NULL, NULL)); } }
// Setup for a baseline normalization. If there are segs, then they // are used, otherwise, if there is a row, that is used, otherwise the // bottom of the word_box is used for the baseline. void DENORM::SetupBLNormalize(const BLOCK* block, const ROW* row, float x_height, const TBOX& word_box, int num_segs, const DENORM_SEG* segs) { float scale = kBlnXHeight / x_height; float x_origin = (word_box.left() + word_box.right()) / 2.0f; float y_origin = 0.0f; if (num_segs == 0 && row == NULL) { y_origin = word_box.bottom(); } SetupNormalization(block, row, NULL, NULL, segs, num_segs, x_origin, y_origin, scale, scale, 0.0f, static_cast<float>(kBlnBaselineOffset)); }
void show_point(PAGE_RES* page_res, float x, float y) { FCOORD pt(x, y); PAGE_RES_IT pr_it(page_res); const int kBufsize = 512; char msg[kBufsize]; char *msg_ptr = msg; msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y); for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) { if (pr_it.row() != pr_it.prev_row() && pr_it.row()->row->bounding_box().contains(pt)) { msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", pr_it.row()->row->base_line(x)); } if (word->word->bounding_box().contains(pt)) { TBOX box = word->word->bounding_box(); msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(), box.top()); C_BLOB_IT cblob_it(word->word->cblob_list()); for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) { C_BLOB* cblob = cblob_it.data(); box = cblob->bounding_box(); if (box.contains(pt)) { msg_ptr += sprintf(msg_ptr, "CBlb(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(), box.top()); } } } } image_win->AddMessage(msg); }
/** * Returns the bounding rectangle of the current object at the given level in * the coordinates of the working image that is pix_binary(). * See comment on coordinate system above. * Returns false if there is no such object at the current position. */ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int* left, int* top, int* right, int* bottom) const { if (Empty(level)) return false; TBOX box; PARA *para = NULL; switch (level) { case RIL_BLOCK: box = it_->block()->block->bounding_box(); break; case RIL_PARA: para = it_->row()->row->para(); // explicit fall-through. case RIL_TEXTLINE: box = it_->row()->row->bounding_box(); break; case RIL_WORD: box = it_->word()->word->bounding_box(); break; case RIL_SYMBOL: if (cblob_it_ == NULL) box = it_->word()->box_word->BlobBox(blob_index_); else box = cblob_it_->data()->bounding_box(); } if (level == RIL_PARA) { PageIterator other = *this; other.Begin(); do { if (other.it_->block() && other.it_->block()->block == it_->block()->block && other.it_->row() && other.it_->row()->row && other.it_->row()->row->para() == para) { box = box.bounding_union(other.it_->row()->row->bounding_box()); } } while (other.Next(RIL_TEXTLINE)); } if (level != RIL_SYMBOL || cblob_it_ != NULL) box.rotate(it_->block()->block->re_rotation()); // Now we have a box in tesseract coordinates relative to the image rectangle, // we have to convert the coords to a top-down system. const int pix_height = pixGetHeight(tesseract_->pix_binary()); const int pix_width = pixGetWidth(tesseract_->pix_binary()); *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width); *top = ClipToRange(pix_height - box.top(), 0, pix_height); *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width); *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height); return true; }
// Helper function to add 1 to a rectangle in source image coords to the // internal projection pix_. void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) { int scaled_left = ImageXToProjectionX(box.left()); int scaled_top = ImageYToProjectionY(box.top()); int scaled_right = ImageXToProjectionX(box.right()); int scaled_bottom = ImageYToProjectionY(box.bottom()); int wpl = pixGetWpl(pix_); uint32_t* data = pixGetData(pix_) + scaled_top * wpl; for (int y = scaled_top; y <= scaled_bottom; ++y) { for (int x = scaled_left; x <= scaled_right; ++x) { int pixel = GET_DATA_BYTE(data, x); if (pixel < 255) SET_DATA_BYTE(data, x, pixel + 1); } data += wpl; } }
// Returns true if more than half the area of the rect is covered by grid // cells that are over the threshold. bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const { int min_x, min_y, max_x, max_y; GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); GridCoords(rect.right(), rect.top(), &max_x, &max_y); int total_area = 0; for (int y = min_y; y <= max_y; ++y) { for (int x = min_x; x <= max_x; ++x) { int value = GridCellValue(x, y); if (value > threshold) { TBOX cell_box(x * gridsize_, y * gridsize_, (x + 1) * gridsize_, (y + 1) * gridsize_); cell_box &= rect; // This is in-place box intersection. total_area += cell_box.area(); } } } return total_area * 2 > rect.area(); }
// Display the tab codes of the BLOBNBOXes in this grid. ScrollView* AlignedBlob::DisplayTabs(const char* window_name, ScrollView* tab_win) { #ifndef GRAPHICS_DISABLED if (tab_win == NULL) tab_win = MakeWindow(0, 50, window_name); // For every tab in the grid, display it. GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this); gsearch.StartFullSearch(); BLOBNBOX* bbox; while ((bbox = gsearch.NextFullSearch()) != NULL) { TBOX box = bbox->bounding_box(); int left_x = box.left(); int right_x = box.right(); int top_y = box.top(); int bottom_y = box.bottom(); TabType tabtype = bbox->left_tab_type(); if (tabtype != TT_NONE) { if (tabtype == TT_UNCONFIRMED) tab_win->Pen(ScrollView::BLUE); else if (tabtype == TT_CONFIRMED) tab_win->Pen(ScrollView::GREEN); else if (tabtype == TT_FAKE) tab_win->Pen(ScrollView::YELLOW); else tab_win->Pen(ScrollView::GREY); tab_win->Line(left_x, top_y, left_x, bottom_y); } tabtype = bbox->right_tab_type(); if (tabtype != TT_NONE) { if (tabtype == TT_UNCONFIRMED) tab_win->Pen(ScrollView::MAGENTA); else if (tabtype == TT_CONFIRMED) tab_win->Pen(ScrollView::RED); else if (tabtype == TT_FAKE) tab_win->Pen(ScrollView::ORANGE); else tab_win->Pen(ScrollView::GREY); tab_win->Line(right_x, top_y, right_x, bottom_y); } } tab_win->Update(); #endif return tab_win; }
// Displays the segmentation state of *this (if not the same as the last // one displayed) and waits for a click in the window. void WERD_CHOICE::DisplaySegmentation(TWERD* word) { #ifndef GRAPHICS_DISABLED // Number of different colors to draw with. const int kNumColors = 6; static ScrollView *segm_window = NULL; // Check the state against the static prev_drawn_state. static GenericVector<int> prev_drawn_state; bool already_done = prev_drawn_state.size() == length_; if (!already_done) prev_drawn_state.init_to_size(length_, 0); for (int i = 0; i < length_; ++i) { if (prev_drawn_state[i] != state_[i]) { already_done = false; } prev_drawn_state[i] = state_[i]; } if (already_done || word->blobs.empty()) return; // Create the window if needed. if (segm_window == NULL) { segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, 2000.0, 256.0, true); } else { segm_window->Clear(); } TBOX bbox; int blob_index = 0; for (int c = 0; c < length_; ++c) { ScrollView::Color color = static_cast<ScrollView::Color>(c % kNumColors + 3); for (int i = 0; i < state_[c]; ++i, ++blob_index) { TBLOB* blob = word->blobs[blob_index]; bbox += blob->bounding_box(); blob->plot(segm_window, color, color); } } segm_window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); segm_window->Update(); window_wait(segm_window); #endif }
/** * Returns the baseline of the current object at the given level. * The baseline is the line that passes through (x1, y1) and (x2, y2). * WARNING: with vertical text, baselines may be vertical! */ bool PageIterator::Baseline(PageIteratorLevel level, int* x1, int* y1, int* x2, int* y2) const { if (it_->word() == NULL) return false; // Already at the end! ROW* row = it_->row()->row; WERD* word = it_->word()->word; TBOX box = (level == RIL_WORD || level == RIL_SYMBOL) ? word->bounding_box() : row->bounding_box(); int left = box.left(); ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5)); int right = box.right(); ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5)); // Rotate to image coordinates and convert to global image coords. startpt.rotate(it_->block()->block->re_rotation()); endpt.rotate(it_->block()->block->re_rotation()); *x1 = startpt.x() / scale_ + rect_left_; *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_; *x2 = endpt.x() / scale_ + rect_left_; *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_; return true; }
// Fixes the block so it obeys all the rules: // Must have at least one ROW. // Must have at least one WERD. // WERDs contain a fake blob. void Textord::cleanup_nontext_block(BLOCK* block) { // Non-text blocks must contain at least one row. ROW_IT row_it(block->row_list()); if (row_it.empty()) { TBOX box = block->bounding_box(); float height = box.height(); inT32 xstarts[2] = {box.left(), box.right()}; double coeffs[3] = {0.0, 0.0, static_cast<double>(box.bottom())}; ROW* row = new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f, height / 4.0f, 0, 1); row_it.add_after_then_move(row); } // Each row must contain at least one word. for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { ROW* row = row_it.data(); WERD_IT w_it(row->word_list()); if (w_it.empty()) { // Make a fake blob to put in the word. TBOX box = block->row_list()->singleton() ? block->bounding_box() : row->bounding_box(); C_BLOB* blob = C_BLOB::FakeBlob(box); C_BLOB_LIST blobs; C_BLOB_IT blob_it(&blobs); blob_it.add_after_then_move(blob); WERD* word = new WERD(&blobs, 0, NULL); w_it.add_after_then_move(word); } // Each word must contain a fake blob. for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { WERD* word = w_it.data(); // Just assert that this is true, as it would be useful to find // out why it isn't. ASSERT_HOST(!word->cblob_list()->empty()); } row->recalc_bounding_box(); } }
/********************************************************************** * make_rotated_tess_blob * * Make a single Tess style blob, applying the given rotation and * renormalizing. **********************************************************************/ TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob, BOOL8 flatten) { if (denorm != NULL && denorm->block() != NULL && denorm->block()->classify_rotation().y() != 0.0) { TBOX box = blob->bounding_box(); int src_width = box.width(); int src_height = box.height(); src_width = static_cast<int>(src_width / denorm->scale() + 0.5); src_height = static_cast<int>(src_height / denorm->scale() + 0.5); int x_middle = (box.left() + box.right()) / 2; int y_middle = (box.top() + box.bottom()) / 2; PBLOB* rotated_blob = PBLOB::deep_copy(blob); rotated_blob->move(FCOORD(-x_middle, -y_middle)); rotated_blob->rotate(denorm->block()->classify_rotation()); ICOORD median_size = denorm->block()->median_size(); int tolerance = median_size.x() / 8; // TODO(dsl/rays) find a better normalization solution. In the mean time // make it work for CJK by normalizing for Cap height in the same way // as is applied in compute_block_xheight when the row is presumed to // be ALLCAPS, i.e. the x-height is the fixed fraction // blob height * textord_merge_x / (textord_merge_x + textord_merge_asc) if (NearlyEqual(src_width, static_cast<int>(median_size.x()), tolerance) && NearlyEqual(src_height, static_cast<int>(median_size.y()), tolerance)) { float target_height = bln_x_height * (textord_merge_x + textord_merge_asc) / textord_merge_x; rotated_blob->scale(target_height / box.width()); rotated_blob->move(FCOORD(0.0f, bln_baseline_offset - rotated_blob->bounding_box().bottom())); } TBLOB* result = make_tess_blob(rotated_blob, flatten); delete rotated_blob; return result; } else { return make_tess_blob(blob, flatten); } }