// Internal version of EvaluateBox returns the unclipped gradients as well // as the result of EvaluateBox. // hgrad1 and hgrad2 are the gradients for the horizontal textline. int TextlineProjection::EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug, int* hgrad1, int* hgrad2, int* vgrad1, int* vgrad2) const { int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(), box.top(), true); int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(), box.bottom(), false); int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(), box.top(), true); int right_gradient = -BestMeanGradientInColumn(denorm, box.right(), box.bottom(), box.top(), false); int top_clipped = MAX(top_gradient, 0); int bottom_clipped = MAX(bottom_gradient, 0); int left_clipped = MAX(left_gradient, 0); int right_clipped = MAX(right_gradient, 0); if (debug) { tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:", top_gradient, bottom_gradient, left_gradient, right_gradient); box.print(); } int result = MAX(top_clipped, bottom_clipped) - MAX(left_clipped, right_clipped); if (hgrad1 != NULL && hgrad2 != NULL) { *hgrad1 = top_gradient; *hgrad2 = bottom_gradient; } if (vgrad1 != NULL && vgrad2 != NULL) { *vgrad1 = left_gradient; *vgrad2 = right_gradient; } return result; }
// Compute the distance from the from_box to the to_box using curved // projection space. Separation that involves a decrease in projection // density (moving from the from_box to the to_box) is weighted more heavily // than constant density, and an increase is weighted less. // If horizontal_textline is true, then curved space is used vertically, // as for a diacritic on the edge of a textline. // The projection uses original image coords, so denorm is used to get // back to the image coords from box/part space. // How the calculation works: Think of a diacritic near a textline. // Distance is measured from the far side of the from_box to the near side of // the to_box. Shown is the horizontal textline case. // |------^-----| // | from | box | // |------|-----| // perpendicular | // <------v-------->|--------------------| // parallel | to box | // |--------------------| // Perpendicular distance uses "curved space" See VerticalDistance below. // Parallel distance is linear. // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio. int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box, bool horizontal_textline, const DENORM* denorm, bool debug) const { // The parallel_gap is the horizontal gap between a horizontal textline and // the box. Analogous for vertical. int parallel_gap = 0; // start_pt is the box end of the line to be modified for curved space. TPOINT start_pt; // end_pt is the partition end of the line to be modified for curved space. TPOINT end_pt; if (horizontal_textline) { parallel_gap = from_box.x_gap(to_box) + from_box.width(); start_pt.x = (from_box.left() + from_box.right()) / 2; end_pt.x = start_pt.x; if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) { start_pt.y = from_box.top(); end_pt.y = MIN(to_box.top(), start_pt.y); } else { start_pt.y = from_box.bottom(); end_pt.y = MAX(to_box.bottom(), start_pt.y); } } else { parallel_gap = from_box.y_gap(to_box) + from_box.height(); if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) { start_pt.x = from_box.right(); end_pt.x = MIN(to_box.right(), start_pt.x); } else { start_pt.x = from_box.left(); end_pt.x = MAX(to_box.left(), start_pt.x); } start_pt.y = (from_box.bottom() + from_box.top()) / 2; end_pt.y = start_pt.y; } // The perpendicular gap is the max vertical distance gap out of: // top of from_box to to_box top and bottom of from_box to to_box bottom. // This value is then modified for curved projection space. // Analogous for vertical. int perpendicular_gap = 0; // If start_pt == end_pt, then the from_box lies entirely within the to_box // (in the perpendicular direction), so we don't need to calculate the // perpendicular_gap. if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) { if (denorm != NULL) { // Denormalize the start and end. denorm->DenormTransform(NULL, start_pt, &start_pt); denorm->DenormTransform(NULL, end_pt, &end_pt); } if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) { perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, end_pt.y); } else { perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, start_pt.y); } } // The parallel_gap weighs less than the perpendicular_gap. return perpendicular_gap + parallel_gap / kParaPerpDistRatio; }
QString printTBOX(TBOX box,int height, bool eol) { if (eol) return QString ("Bounding box=(%1,%2)->(%3,%4)\n").arg(box.left()) .arg(height - box.top()).arg(box.right()).arg(height - box.bottom()); else return QString ("Bounding box=(%1,%2)->(%3,%4)").arg(box.left()) .arg(height - box.top()).arg(box.right()).arg(height - box.bottom()); }
// Tests each blob in the list to see if it is certain non-text using 2 // conditions: // 1. blob overlaps a cell with high value in noise_density_ (previously set // by ComputeNoiseDensity). // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This // condition is disabled with max_blob_overlaps == -1. // If it does, the blob is declared non-text, and is used to mark up the // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their // neighbours reset, as they may now point to deleted data. // WARNING: The blobs list blobs may be in the *this grid, but they are // not removed. If any deleted blobs might be in *this, then this must be // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. // If the win is not NULL, deleted blobs are drawn on it in red, and kept // blobs are drawn on it in ok_color. void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, int max_blob_overlaps, ScrollView* win, ScrollView::Color ok_color, Pix* nontext_mask) { int imageheight = tright().y() - bleft().x(); BLOBNBOX_IT blob_it(blobs); BLOBNBOX_LIST dead_blobs; BLOBNBOX_IT dead_it(&dead_blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX box = blob->bounding_box(); if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && (max_blob_overlaps < 0 || !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { blob->ClearNeighbours(); #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ok_color, ok_color); #endif // GRAPHICS_DISABLED } else { if (noise_density_->AnyZeroInRect(box)) { // There is a danger that the bounding box may overlap real text, so // we need to render the outline. Pix* blob_pix = blob->cblob()->render_outline(); pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SRC | PIX_DST, blob_pix, 0, 0); pixDestroy(&blob_pix); } else { if (box.area() < gridsize() * gridsize()) { // It is a really bad idea to make lots of small components in the // photo mask, so try to join it to a bigger area by expanding the // box in a way that does not touch any zero noise density cell. box = AttemptBoxExpansion(box, *noise_density_, gridsize()); } // All overlapped cells are non-zero, so just mark the rectangle. pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SET, NULL, 0, 0); } #ifndef GRAPHICS_DISABLED if (win != NULL) blob->plot(win, ScrollView::RED, ScrollView::RED); #endif // GRAPHICS_DISABLED // It is safe to delete the cblob now, as it isn't used by the grid // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the // dead_blobs list. // TODO(rays) delete the delete when the BLOBNBOX destructor deletes // the cblob. delete blob->cblob(); dead_it.add_to_end(blob_it.extract()); } } }
// Initialize from box coordinates. POLY_BLOCK::POLY_BLOCK(const TBOX& box, PolyBlockType t) { vertices.clear(); ICOORDELT_IT v = &vertices; v.move_to_first(); v.add_to_end(new ICOORDELT(box.left(), box.top())); v.add_to_end(new ICOORDELT(box.left(), box.bottom())); v.add_to_end(new ICOORDELT(box.right(), box.bottom())); v.add_to_end(new ICOORDELT(box.right(), box.top())); compute_bb(); type = t; }
/** * @name start_seam_list * * Initialize a list of seams that match the original number of blobs * present in the starting segmentation. Each of the seams created * by this routine have location information only. */ void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array) { seam_array->truncate(0); TPOINT location; for (int b = 1; b < word->NumBlobs(); ++b) { TBOX bbox = word->blobs[b - 1]->bounding_box(); TBOX nbox = word->blobs[b]->bounding_box(); location.x = (bbox.right() + nbox.left()) / 2; location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4; seam_array->push_back(new SEAM(0.0f, location, NULL, NULL, NULL)); } }
bool PIXROW::bad_box( //return true if box exceeds image int xsize, int ysize) const { TBOX bbox = bounding_box (); if (bbox.left () < 0 || bbox.right () > xsize || bbox.top () > ysize || bbox.bottom () < 0) { tprintf("Box (%d,%d)->(%d,%d) bad compared to %d,%d\n", bbox.left(),bbox.bottom(), bbox.right(), bbox.top(), xsize, ysize); return true; } return false; }
/********************************************************************** * render_segmentation * * Create a list of line segments that represent the list of chunks * using the correct segmentation that was supplied as input. **********************************************************************/ void render_segmentation(ScrollView *window, TBLOB *chunks, SEARCH_STATE segmentation) { TBLOB *blob; C_COL color = Black; int char_num = -1; int chunks_left = 0; TBOX bbox; if (chunks) bbox = chunks->bounding_box(); for (blob = chunks; blob != NULL; blob = blob->next) { bbox += blob->bounding_box(); if (chunks_left-- == 0) { color = color_list[++char_num % NUM_COLORS]; if (char_num < segmentation[0]) chunks_left = segmentation[char_num + 1]; else chunks_left = MAX_INT32; } render_outline(window, blob->outlines, color); } window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); }
float blob_noise_score(PBLOB *blob) { OUTLINE_IT outline_it; TBOX box; //BB of outline inT16 outline_count = 0; inT16 max_dimension; inT16 largest_outline_dimension = 0; outline_it.set_to_list (blob->out_list ()); for (outline_it.mark_cycle_pt (); !outline_it.cycled_list (); outline_it.forward ()) { outline_count++; box = outline_it.data ()->bounding_box (); if (box.height () > box.width ()) max_dimension = box.height (); else max_dimension = box.width (); if (largest_outline_dimension < max_dimension) largest_outline_dimension = max_dimension; } if (fixsp_noise_score_fixing) { if (outline_count > 5) //penalise LOTS of blobs largest_outline_dimension *= 2; box = blob->bounding_box (); if ((box.bottom () > bln_baseline_offset * 4) || (box.top () < bln_baseline_offset / 2)) //Lax blob is if high or low largest_outline_dimension /= 2; } return largest_outline_dimension; }
static void PrintBoxWidths(BLOBNBOX* neighbour) { TBOX nbox = neighbour->bounding_box(); tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter()); }
// Inserts a list of blobs into the projection. // Rotation is a multiple of 90 degrees to get from blob coords to // nontext_map coords, nontext_map_box is the bounds of the nontext_map. // Blobs are spread horizontally or vertically according to their internal // flags, but the spreading is truncated by set pixels in the nontext_map // and also by the horizontal rule line limits on the blobs. void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation, const TBOX& nontext_map_box, Pix* nontext_map) { BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); TBOX bbox = blob->bounding_box(); ICOORD middle((bbox.left() + bbox.right()) / 2, (bbox.bottom() + bbox.top()) / 2); bool spreading_horizontally = PadBlobBox(blob, &bbox); // Rotate to match the nontext_map. bbox.rotate(rotation); middle.rotate(rotation); if (rotation.x() == 0.0f) spreading_horizontally = !spreading_horizontally; // Clip to the image before applying the increments. bbox &= nontext_map_box; // This is in-place box intersection. // Check for image pixels before spreading. TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally, nontext_map, &bbox); if (bbox.area() > 0) { IncrementRectangle8Bit(bbox); } } }
void OL_BUCKETS::extract_children( // recursive count C_OUTLINE *outline, // parent outline C_OUTLINE_IT *it // destination iterator ) { inT16 xmin, xmax; // coord limits inT16 ymin, ymax; inT16 xindex, yindex; // current bucket TBOX olbox; C_OUTLINE_IT child_it; // search iterator olbox = outline->bounding_box(); xmin =(olbox.left() - bl.x()) / BUCKETSIZE; xmax =(olbox.right() - bl.x()) / BUCKETSIZE; ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; ymax =(olbox.top() - bl.y()) / BUCKETSIZE; for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list(&buckets[yindex * bxdim + xindex]); for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) { if (*child_it.data() < *outline) { it->add_after_then_move(child_it.extract()); } } } } }
float Tesseract::blob_noise_score(TBLOB *blob) { TBOX box; // BB of outline inT16 outline_count = 0; inT16 max_dimension; inT16 largest_outline_dimension = 0; for (TESSLINE* ol = blob->outlines; ol != NULL; ol= ol->next) { outline_count++; box = ol->bounding_box(); if (box.height() > box.width()) { max_dimension = box.height(); } else { max_dimension = box.width(); } if (largest_outline_dimension < max_dimension) largest_outline_dimension = max_dimension; } if (outline_count > 5) { // penalise LOTS of blobs largest_outline_dimension *= 2; } box = blob->bounding_box(); if (box.bottom() > kBlnBaselineOffset * 4 || box.top() < kBlnBaselineOffset / 2) { // Lax blob is if high or low largest_outline_dimension /= 2; } return largest_outline_dimension; }
// Generates a TrainingSample from a TBLOB. Extracts features and sets // the bounding box, so classifiers that operate on the image can work. // TODO(rays) Make BlobToTrainingSample a member of Classify now that // the FlexFx and FeatureDescription code have been removed and LearnBlob // is now a member of Classify. TrainingSample* BlobToTrainingSample( const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, GenericVector<INT_FEATURE_STRUCT>* bl_features) { GenericVector<INT_FEATURE_STRUCT> cn_features; Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, &cn_features, fx_info, nullptr); // TODO(rays) Use blob->PreciseBoundingBox() instead. TBOX box = blob.bounding_box(); TrainingSample* sample = nullptr; int num_features = fx_info->NumCN; if (num_features > 0) { sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], num_features); } if (sample != nullptr) { // Set the bounding box (in original image coordinates) in the sample. TPOINT topleft, botright; topleft.x = box.left(); topleft.y = box.top(); botright.x = box.right(); botright.y = box.bottom(); TPOINT original_topleft, original_botright; blob.denorm().DenormTransform(nullptr, topleft, &original_topleft); blob.denorm().DenormTransform(nullptr, botright, &original_botright); sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y)); } return sample; }
// This function takes tif/box pair of files and runs recognition on the image, // while making sure that the word bounds that tesseract identified roughly // match to those specified by the input box file. For each word (ngram in a // single bounding box from the input box file) it outputs the ocred result, // the correct label, rating and certainty. void Tesseract::recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file) { STRING box_fname = fname; const char *lastdot = strrchr(box_fname.string(), '.'); if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0'; box_fname += ".box"; // read_next_box() will close box_file FILE *box_file = open_file(box_fname.string(), "r"); PAGE_RES_IT page_res_it; page_res_it.page_res = page_res; page_res_it.restart_page(); char label[kBoxReadBufSize]; // Process all the words on this page. TBOX tbox; // tesseract-identified box TBOX bbox; // box from the box file bool keep_going; int line_number = 0; do { keep_going = read_t(&page_res_it, &tbox); keep_going &= read_b(applybox_page, &line_number, box_file, label, &bbox); // Align bottom left points of the TBOXes. while (keep_going && !NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) { keep_going = (bbox.bottom() < tbox.bottom()) ? read_t(&page_res_it, &tbox) : read_b(applybox_page, &line_number, box_file, label, &bbox); } while (keep_going && !NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) { keep_going = (bbox.left() > tbox.left()) ? read_t(&page_res_it, &tbox) : read_b(applybox_page, &line_number, box_file, label, &bbox); } // OCR the word if top right points of the TBOXes are similar. if (keep_going && NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) && NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) { ambigs_classify_and_output(page_res_it.prev_word(), page_res_it.prev_row(), page_res_it.prev_block(), label, output_file); } } while (keep_going); }
// Extract the OCR results, costs (penalty points for uncertainty), // and the bounding boxes of the characters. static void extract_result(ELIST_ITERATOR *out, PAGE_RES* page_res) { PAGE_RES_IT page_res_it(page_res); int word_count = 0; while (page_res_it.word() != NULL) { WERD_RES *word = page_res_it.word(); const char *str = word->best_choice->string().string(); const char *len = word->best_choice->lengths().string(); if (word_count) add_space(out); TBOX bln_rect; PBLOB_LIST *blobs = word->outword->blob_list(); PBLOB_IT it(blobs); int n = strlen(len); TBOX** boxes_to_fix = new TBOX*[n]; for (int i = 0; i < n; i++) { PBLOB *blob = it.data(); TBOX current = pblob_get_bbox(blob); bln_rect.bounding_union(current); TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), str, *len); tc->box = current; boxes_to_fix[i] = &tc->box; out->add_after_then_move(tc); it.forward(); str += *len; len++; } // Find the word bbox before normalization. // Here we can't use the C_BLOB bboxes directly, // since connected letters are not yet cut. TBOX real_rect = c_blob_list_get_bbox(word->word->cblob_list()); // Denormalize boxes by transforming the bbox of the whole bln word // into the denorm bbox (`real_rect') of the whole word. double x_stretch = double(real_rect.width()) / bln_rect.width(); double y_stretch = double(real_rect.height()) / bln_rect.height(); for (int j = 0; j < n; j++) { TBOX *box = boxes_to_fix[j]; int x0 = int(real_rect.left() + x_stretch * (box->left() - bln_rect.left()) + 0.5); int x1 = int(real_rect.left() + x_stretch * (box->right() - bln_rect.left()) + 0.5); int y0 = int(real_rect.bottom() + y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5); int y1 = int(real_rect.bottom() + y_stretch * (box->top() - bln_rect.bottom()) + 0.5); *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1)); } delete [] boxes_to_fix; page_res_it.forward(); word_count++; } }
// Creates a box file string from a unichar string, TBOX and page number. void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num, STRING* box_str) { *box_str = unichar_str; box_str->add_str_int(" ", box.left()); box_str->add_str_int(" ", box.bottom()); box_str->add_str_int(" ", box.right()); box_str->add_str_int(" ", box.top()); box_str->add_str_int(" ", page_num); }
// Returns true if any cell value in the given rectangle is zero. bool IntGrid::AnyZeroInRect(const TBOX& rect) const { int min_x, min_y, max_x, max_y; GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); GridCoords(rect.right(), rect.top(), &max_x, &max_y); for (int y = min_y; y <= max_y; ++y) { for (int x = min_x; x <= max_x; ++x) { if (GridCellValue(x, y) == 0) return true; } } return false; }
// Extend this vector to include the supplied blob if it doesn't // already have it. void TabVector::ExtendToBox(BLOBNBOX* new_blob) { TBOX new_box = new_blob->bounding_box(); BLOBNBOX_C_IT it(&boxes_); if (!it.empty()) { BLOBNBOX* blob = it.data(); TBOX box = blob->bounding_box(); while (!it.at_last() && box.top() <= new_box.top()) { if (blob == new_blob) return; // We have it already. it.forward(); blob = it.data(); box = blob->bounding_box(); } if (box.top() >= new_box.top()) { it.add_before_stay_put(new_blob); needs_refit_ = true; return; } } needs_refit_ = true; it.add_after_stay_put(new_blob); }
void show_point(PAGE_RES* page_res, float x, float y) { FCOORD pt(x, y); PAGE_RES_IT pr_it(page_res); const int kBufsize = 512; char msg[kBufsize]; char *msg_ptr = msg; msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y); for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) { if (pr_it.row() != pr_it.prev_row() && pr_it.row()->row->bounding_box().contains(pt)) { msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", pr_it.row()->row->base_line(x)); } if (word->word->bounding_box().contains(pt)) { TBOX box = word->word->bounding_box(); msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(), box.top()); C_BLOB_IT cblob_it(word->word->cblob_list()); for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) { C_BLOB* cblob = cblob_it.data(); box = cblob->bounding_box(); if (box.contains(pt)) { msg_ptr += sprintf(msg_ptr, "CBlb(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(), box.top()); } } } } image_win->AddMessage(msg); }
/** * Returns the bounding rectangle of the current object at the given level in * the coordinates of the working image that is pix_binary(). * See comment on coordinate system above. * Returns false if there is no such object at the current position. */ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int* left, int* top, int* right, int* bottom) const { if (Empty(level)) return false; TBOX box; PARA *para = NULL; switch (level) { case RIL_BLOCK: box = it_->block()->block->bounding_box(); break; case RIL_PARA: para = it_->row()->row->para(); // explicit fall-through. case RIL_TEXTLINE: box = it_->row()->row->bounding_box(); break; case RIL_WORD: box = it_->word()->word->bounding_box(); break; case RIL_SYMBOL: if (cblob_it_ == NULL) box = it_->word()->box_word->BlobBox(blob_index_); else box = cblob_it_->data()->bounding_box(); } if (level == RIL_PARA) { PageIterator other = *this; other.Begin(); do { if (other.it_->block() && other.it_->block()->block == it_->block()->block && other.it_->row() && other.it_->row()->row && other.it_->row()->row->para() == para) { box = box.bounding_union(other.it_->row()->row->bounding_box()); } } while (other.Next(RIL_TEXTLINE)); } if (level != RIL_SYMBOL || cblob_it_ != NULL) box.rotate(it_->block()->block->re_rotation()); // Now we have a box in tesseract coordinates relative to the image rectangle, // we have to convert the coords to a top-down system. const int pix_height = pixGetHeight(tesseract_->pix_binary()); const int pix_width = pixGetWidth(tesseract_->pix_binary()); *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width); *top = ClipToRange(pix_height - box.top(), 0, pix_height); *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width); *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height); return true; }
// Returns the bounding box including the desired combination of upper and // lower noise/diacritic elements. TBOX WERD::restricted_bounding_box(bool upper_dots, bool lower_dots) const { TBOX box = true_bounding_box(); int bottom = box.bottom(); int top = box.top(); // This is a read-only iteration of the rejected blobs. C_BLOB_IT it(const_cast<C_BLOB_LIST*>(&rej_cblobs)); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TBOX dot_box = it.data()->bounding_box(); if ((upper_dots || dot_box.bottom() <= top) && (lower_dots || dot_box.top() >= bottom)) { box += dot_box; } } return box; }
// Helper function to add 1 to a rectangle in source image coords to the // internal projection pix_. void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) { int scaled_left = ImageXToProjectionX(box.left()); int scaled_top = ImageYToProjectionY(box.top()); int scaled_right = ImageXToProjectionX(box.right()); int scaled_bottom = ImageYToProjectionY(box.bottom()); int wpl = pixGetWpl(pix_); uint32_t* data = pixGetData(pix_) + scaled_top * wpl; for (int y = scaled_top; y <= scaled_bottom; ++y) { for (int x = scaled_left; x <= scaled_right; ++x) { int pixel = GET_DATA_BYTE(data, x); if (pixel < 255) SET_DATA_BYTE(data, x, pixel + 1); } data += wpl; } }
// Returns true if more than half the area of the rect is covered by grid // cells that are over the threshold. bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const { int min_x, min_y, max_x, max_y; GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); GridCoords(rect.right(), rect.top(), &max_x, &max_y); int total_area = 0; for (int y = min_y; y <= max_y; ++y) { for (int x = min_x; x <= max_x; ++x) { int value = GridCellValue(x, y); if (value > threshold) { TBOX cell_box(x * gridsize_, y * gridsize_, (x + 1) * gridsize_, (y + 1) * gridsize_); cell_box &= rect; // This is in-place box intersection. total_area += cell_box.area(); } } } return total_area * 2 > rect.area(); }
// Display the tab codes of the BLOBNBOXes in this grid. ScrollView* AlignedBlob::DisplayTabs(const char* window_name, ScrollView* tab_win) { #ifndef GRAPHICS_DISABLED if (tab_win == NULL) tab_win = MakeWindow(0, 50, window_name); // For every tab in the grid, display it. GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this); gsearch.StartFullSearch(); BLOBNBOX* bbox; while ((bbox = gsearch.NextFullSearch()) != NULL) { TBOX box = bbox->bounding_box(); int left_x = box.left(); int right_x = box.right(); int top_y = box.top(); int bottom_y = box.bottom(); TabType tabtype = bbox->left_tab_type(); if (tabtype != TT_NONE) { if (tabtype == TT_UNCONFIRMED) tab_win->Pen(ScrollView::BLUE); else if (tabtype == TT_CONFIRMED) tab_win->Pen(ScrollView::GREEN); else if (tabtype == TT_FAKE) tab_win->Pen(ScrollView::YELLOW); else tab_win->Pen(ScrollView::GREY); tab_win->Line(left_x, top_y, left_x, bottom_y); } tabtype = bbox->right_tab_type(); if (tabtype != TT_NONE) { if (tabtype == TT_UNCONFIRMED) tab_win->Pen(ScrollView::MAGENTA); else if (tabtype == TT_CONFIRMED) tab_win->Pen(ScrollView::RED); else if (tabtype == TT_FAKE) tab_win->Pen(ScrollView::ORANGE); else tab_win->Pen(ScrollView::GREY); tab_win->Line(right_x, top_y, right_x, bottom_y); } } tab_win->Update(); #endif return tab_win; }
/** * Given a recognized blob, see if a contiguous collection of sub-pieces * (chopped blobs) starting at its left might qualify as being a subscript * or superscript letter based only on y position. Also do this for the * right side. */ void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, int super_y_bottom, int sub_y_top, ScriptPos *leading_pos, int *num_leading_outliers, ScriptPos *trailing_pos, int *num_trailing_outliers) { ScriptPos sp_unused1, sp_unused2; int unused1, unused2; if (!leading_pos) leading_pos = &sp_unused1; if (!num_leading_outliers) num_leading_outliers = &unused1; if (!trailing_pos) trailing_pos = &sp_unused2; if (!num_trailing_outliers) num_trailing_outliers = &unused2; *num_leading_outliers = *num_trailing_outliers = 0; *leading_pos = *trailing_pos = SP_NORMAL; int chopped_start = LeadingUnicharsToChopped(word, rebuilt_blob_index); int num_chopped_pieces = word->best_state[rebuilt_blob_index]; ScriptPos last_pos = SP_NORMAL; int trailing_outliers = 0; for (int i = 0; i < num_chopped_pieces; i++) { TBOX box = word->chopped_word->blobs[chopped_start + i]->bounding_box(); ScriptPos pos = SP_NORMAL; if (box.bottom() >= super_y_bottom) { pos = SP_SUPERSCRIPT; } else if (box.top() <= sub_y_top) { pos = SP_SUBSCRIPT; } if (pos == SP_NORMAL) { if (trailing_outliers == i) { *num_leading_outliers = trailing_outliers; *leading_pos = last_pos; } trailing_outliers = 0; } else { if (pos == last_pos) { trailing_outliers++; } else { trailing_outliers = 1; } } last_pos = pos; } *num_trailing_outliers = trailing_outliers; *trailing_pos = last_pos; }
// Given an input pix, and a box, the sides of the box are shrunk inwards until // they bound any black pixels found within the original box. // The function converts between tesseract coords and the pix coords assuming // that this pix is full resolution equal in size to the original image. // Returns an empty box if there are no black pixels in the source box. static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) { int im_height = pixGetHeight(pix); Box* input_box = boxCreate(box.left(), im_height - box.top(), box.width(), box.height()); Box* output_box = NULL; pixClipBoxToForeground(pix, input_box, NULL, &output_box); TBOX result_box; if (output_box != NULL) { l_int32 x, y, width, height; boxGetGeometry(output_box, &x, &y, &width, &height); result_box.set_left(x); result_box.set_right(x + width); result_box.set_top(im_height - y); result_box.set_bottom(result_box.top() - height); boxDestroy(&output_box); } boxDestroy(&input_box); return result_box; }
// Displays the segmentation state of *this (if not the same as the last // one displayed) and waits for a click in the window. void WERD_CHOICE::DisplaySegmentation(TWERD* word) { #ifndef GRAPHICS_DISABLED // Number of different colors to draw with. const int kNumColors = 6; static ScrollView *segm_window = NULL; // Check the state against the static prev_drawn_state. static GenericVector<int> prev_drawn_state; bool already_done = prev_drawn_state.size() == length_; if (!already_done) prev_drawn_state.init_to_size(length_, 0); for (int i = 0; i < length_; ++i) { if (prev_drawn_state[i] != state_[i]) { already_done = false; } prev_drawn_state[i] = state_[i]; } if (already_done || word->blobs.empty()) return; // Create the window if needed. if (segm_window == NULL) { segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, 2000.0, 256.0, true); } else { segm_window->Clear(); } TBOX bbox; int blob_index = 0; for (int c = 0; c < length_; ++c) { ScrollView::Color color = static_cast<ScrollView::Color>(c % kNumColors + 3); for (int i = 0; i < state_[c]; ++i, ++blob_index) { TBLOB* blob = word->blobs[blob_index]; bbox += blob->bounding_box(); blob->plot(segm_window, color, color); } } segm_window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); segm_window->Update(); window_wait(segm_window); #endif }
// Fills in the x-height range accepted by the given unichar_id, given its // bounding box in the usual baseline-normalized coordinates, with some // initial crude x-height estimate (such as word size) and this denoting the // transformation that was used. Returns false, and an empty range if the // bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom // fits, but the top is impossible. bool DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset, const TBOX& bbox, inT16* min_xht, inT16* max_xht) const { // Clip the top and bottom to the limit of normalized feature space. int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1); int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1); // A tolerance of yscale corresponds to 1 pixel in the image. double tolerance = y_scale(); int min_bottom, max_bottom, min_top, max_top; unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top); // Default returns indicate a mis-fit. *min_xht = 0; *max_xht = 0; // Chars with a misfitting bottom might be sub/superscript/dropcap, or might // just be wrongly classified. Return an empty range so they have to be // good to be considered. if (bottom < min_bottom - tolerance || bottom > max_bottom + tolerance) { return false; } // To help very high cap/xheight ratio fonts accept the correct x-height, // and to allow the large caps in small caps to accept the xheight of the // small caps, add kBlnBaselineOffset to chars with a maximum max. if (max_top == kBlnCellHeight - 1) max_top += kBlnBaselineOffset; int height = top - kBlnBaselineOffset; double min_height = min_top - kBlnBaselineOffset - tolerance; double max_height = max_top - kBlnBaselineOffset + tolerance; if (min_height <= 0.0) { if (height <= 0 || max_height > 0) *max_xht = MAX_INT16; // Anything will do. } else if (height > 0) { int result = IntCastRounded(height * kBlnXHeight / y_scale() / min_height); *max_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16)); } if (max_height > 0.0 && height > 0) { int result = IntCastRounded(height * kBlnXHeight / y_scale() / max_height); *min_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16)); } return true; }
// Normalizes the blob for classification only if needed. // (Normally this means a non-zero classify rotation.) // If no Normalization is needed, then NULL is returned, and the input blob // can be used directly. Otherwise a new TBLOB is returned which must be // deleted after use. TBLOB* TBLOB::ClassifyNormalizeIfNeeded() const { TBLOB* rotated_blob = NULL; // If necessary, copy the blob and rotate it. The rotation is always // +/- 90 degrees, as 180 was already taken care of. if (denorm_.block() != NULL && denorm_.block()->classify_rotation().y() != 0.0) { TBOX box = bounding_box(); int x_middle = (box.left() + box.right()) / 2; int y_middle = (box.top() + box.bottom()) / 2; rotated_blob = new TBLOB(*this); const FCOORD& rotation = denorm_.block()->classify_rotation(); // Move the rotated blob back to the same y-position so that we // can still distinguish similar glyphs with differeny y-position. float target_y = kBlnBaselineOffset + (rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle); rotated_blob->Normalize(NULL, &rotation, &denorm_, x_middle, y_middle, 1.0f, 1.0f, 0.0f, target_y, denorm_.inverse(), denorm_.pix()); } return rotated_blob; }