float Tesseract::blob_noise_score(TBLOB *blob) { TBOX box; // BB of outline inT16 outline_count = 0; inT16 max_dimension; inT16 largest_outline_dimension = 0; for (TESSLINE* ol = blob->outlines; ol != NULL; ol= ol->next) { outline_count++; box = ol->bounding_box(); if (box.height() > box.width()) { max_dimension = box.height(); } else { max_dimension = box.width(); } if (largest_outline_dimension < max_dimension) largest_outline_dimension = max_dimension; } if (outline_count > 5) { // penalise LOTS of blobs largest_outline_dimension *= 2; } box = blob->bounding_box(); if (box.bottom() > kBlnBaselineOffset * 4 || box.top() < kBlnBaselineOffset / 2) { // Lax blob is if high or low largest_outline_dimension /= 2; } return largest_outline_dimension; }
// Sets up the built-in DENORM and normalizes the blob in-place. // For parameters see DENORM::SetupNormalization, plus the inverse flag for // this blob and the Pix for the full image. void TBLOB::Normalize(const BLOCK* block, const FCOORD* rotation, const DENORM* predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix* pix) { denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, x_scale, y_scale, final_xshift, final_yshift); denorm_.set_inverse(inverse); denorm_.set_pix(pix); // TODO(rays) outline->Normalize is more accurate, but breaks tests due // the changes it makes. Reinstate this code with a retraining. // The reason this change is troublesome is that it normalizes for the // baseline value computed independently at each x-coord. If the baseline // is not horizontal, this introduces shear into the normalized blob, which // is useful on the rare occasions that the baseline is really curved, but // the baselines need to be stabilized the rest of the time. #if 0 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { outline->Normalize(denorm_); } #else denorm_.LocalNormBlob(this); #endif }
/********************************************************************** * TBLOB::bounding_box() * * Compute the bounding_box of a compound blob, defined to be the * bounding box of the union of all top-level outlines in the blob. **********************************************************************/ TBOX TBLOB::bounding_box() const { if (outlines == nullptr) return TBOX(0, 0, 0, 0); TESSLINE* outline = outlines; TBOX box = outline->bounding_box(); for (outline = outline->next; outline != nullptr; outline = outline->next) { box += outline->bounding_box(); } return box; }
// Normalize in-place using the DENORM. void TBLOB::Normalize(const DENORM& denorm) { // TODO(rays) outline->Normalize is more accurate, but breaks tests due // the changes it makes. Reinstate this code with a retraining. #if 1 for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { outline->Normalize(denorm); } #else denorm.LocalNormBlob(this); #endif }
/** * @name find_split_in_blob * * @returns TRUE if the split is somewhere in this blob. */ bool find_split_in_blob(SPLIT *split, TBLOB *blob) { TESSLINE *outline; for (outline = blob->outlines; outline != NULL; outline = outline->next) if (outline->Contains(split->point1->pos)) break; if (outline == NULL) return FALSE; for (outline = blob->outlines; outline != NULL; outline = outline->next) if (outline->Contains(split->point2->pos)) return TRUE; return FALSE; }
// Finds and deletes any duplicate outlines in this blob, without deleting // their EDGEPTs. void TBLOB::EliminateDuplicateOutlines() { for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) { TESSLINE* last_outline = outline; for (TESSLINE* other_outline = outline->next; other_outline != nullptr; last_outline = other_outline, other_outline = other_outline->next) { if (outline->SameBox(*other_outline)) { last_outline->next = other_outline->next; // This doesn't leak - the outlines share the EDGEPTs. other_outline->loop = nullptr; delete other_outline; other_outline = last_outline; // If it is part of a cut, then it can't be a hole any more. outline->is_hole = false; } } } }
// Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as // (x,y) position and angle as measured counterclockwise from the vector // <-1, 0>, from blob using two normalizations defined by bl_denorm and // cn_denorm. See SetpuBLCNDenorms for definitions. // If outline_cn_counts is not nullptr, on return it contains the cumulative // number of cn features generated for each outline in the blob (in order). // Thus after the first outline, there were (*outline_cn_counts)[0] features, // after the second outline, there were (*outline_cn_counts)[1] features etc. void Classify::ExtractFeatures(const TBLOB& blob, bool nonlinear_norm, GenericVector<INT_FEATURE_STRUCT>* bl_features, GenericVector<INT_FEATURE_STRUCT>* cn_features, INT_FX_RESULT_STRUCT* results, GenericVector<int>* outline_cn_counts) { DENORM bl_denorm, cn_denorm; tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, &bl_denorm, &cn_denorm, results); if (outline_cn_counts != nullptr) outline_cn_counts->truncate(0); // Iterate the outlines. for (TESSLINE* ol = blob.outlines; ol != nullptr; ol = ol->next) { // Iterate the polygon. EDGEPT* loop_pt = ol->FindBestStartPt(); EDGEPT* pt = loop_pt; if (pt == nullptr) continue; do { if (pt->IsHidden()) continue; // Find a run of equal src_outline. EDGEPT* last_pt = pt; do { last_pt = last_pt->next; } while (last_pt != loop_pt && !last_pt->IsHidden() && last_pt->src_outline == pt->src_outline); last_pt = last_pt->prev; // Until the adaptive classifier can be weaned off polygon segments, // we have to force extraction from the polygon for the bl_features. ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength, true, bl_features); ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength, false, cn_features); pt = last_pt; } while ((pt = pt->next) != loop_pt); if (outline_cn_counts != nullptr) outline_cn_counts->push_back(cn_features->size()); } results->NumBL = bl_features->size(); results->NumCN = cn_features->size(); results->YBottom = blob.bounding_box().bottom(); results->YTop = blob.bounding_box().top(); results->Width = blob.bounding_box().width(); }
// Consume the circular list of EDGEPTs to make a TESSLINE. TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) { TESSLINE* result = new TESSLINE; result->loop = outline; if (outline->src_outline != NULL) { // ASSUMPTION: This function is only ever called from ApproximateOutline // and therefore either all points have a src_outline or all do not. // Just as SetupFromPos sets the vectors from the vertices, setup the // step_count members to indicate the (positive) number of original // C_OUTLINE steps to the next vertex. EDGEPT* pt = outline; do { pt->step_count = pt->next->start_step - pt->start_step; if (pt->step_count < 0) pt->step_count += pt->src_outline->pathlength(); pt = pt->next; } while (pt != outline); } result->SetupFromPos(); return result; }
// Consume the circular list of EDGEPTs to make a TESSLINE. TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) { TESSLINE* result = new TESSLINE; result->loop = outline; result->SetupFromPos(); return result; }
void TBLOB::plot(ScrollView* window, ScrollView::Color color, ScrollView::Color child_color) { for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) outline->plot(window, color, child_color); }
// Recomputes the bounding boxes of the outlines. void TBLOB::ComputeBoundingBoxes() { for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { outline->ComputeBoundingBox(); } }
// Scales by the given factor in place. void TBLOB::Scale(float factor) { for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { outline->Scale(factor); } }
// Moves by the given vec in place. void TBLOB::Move(const ICOORD vec) { for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { outline->Move(vec); } }
// Rotates by the given rotation in place. void TBLOB::Rotate(const FCOORD rotation) { for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { outline->Rotate(rotation); } }