// Draws the outline in the given colour, normalized using the given denorm, // making use of sub-pixel accurate information if available. void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour, ScrollView* window) const { window->Pen(colour); if (stepcount == 0) { window->Rectangle(box.left(), box.top(), box.right(), box.bottom()); return; } const DENORM* root_denorm = denorm.RootDenorm(); ICOORD pos = start; // current position FCOORD f_pos = sub_pixel_pos_at_index(pos, 0); FCOORD pos_normed; denorm.NormTransform(root_denorm, f_pos, &pos_normed); window->SetCursor(IntCastRounded(pos_normed.x()), IntCastRounded(pos_normed.y())); for (int s = 0; s < stepcount; pos += step(s++)) { int edge_weight = edge_strength_at_index(s); if (edge_weight == 0) { // This point has conflicting gradient and step direction, so ignore it. continue; } FCOORD f_pos = sub_pixel_pos_at_index(pos, s); FCOORD pos_normed; denorm.NormTransform(root_denorm, f_pos, &pos_normed); window->DrawTo(IntCastRounded(pos_normed.x()), IntCastRounded(pos_normed.y())); } }
// Extracts Tesseract features and appends them to the features vector. // Startpt to lastpt, inclusive, MUST have the same src_outline member, // which may be nullptr. The vector from lastpt to its next is included in // the feature extraction. Hidden edges should be excluded by the caller. // If force_poly is true, the features will be extracted from the polygonal // approximation even if more accurate data is available. static void ExtractFeaturesFromRun( const EDGEPT* startpt, const EDGEPT* lastpt, const DENORM& denorm, double feature_length, bool force_poly, GenericVector<INT_FEATURE_STRUCT>* features) { const EDGEPT* endpt = lastpt->next; const C_OUTLINE* outline = startpt->src_outline; if (outline != nullptr && !force_poly) { // Detailed information is available. We have to normalize only from // the root_denorm to denorm. const DENORM* root_denorm = denorm.RootDenorm(); int total_features = 0; // Get the features from the outline. int step_length = outline->pathlength(); int start_index = startpt->start_step; // pos is the integer coordinates of the binary image steps. ICOORD pos = outline->position_at_index(start_index); // We use an end_index that allows us to use a positive increment, but that // may be beyond the bounds of the outline steps/ due to wrap-around, to // so we use % step_length everywhere, except for start_index. int end_index = lastpt->start_step + lastpt->step_count; if (end_index <= start_index) end_index += step_length; LLSQ prev_points; LLSQ prev_dirs; FCOORD prev_normed_pos = outline->sub_pixel_pos_at_index(pos, start_index); denorm.NormTransform(root_denorm, prev_normed_pos, &prev_normed_pos); LLSQ points; LLSQ dirs; FCOORD normed_pos(0.0f, 0.0f); int index = GatherPoints(outline, feature_length, denorm, root_denorm, start_index, end_index, &pos, &normed_pos, &points, &dirs); while (index <= end_index) { // At each iteration we nominally have 3 accumulated sets of points and // dirs: prev_points/dirs, points/dirs, next_points/dirs and sum them // into sum_points/dirs, but we don't necessarily get any features out, // so if that is the case, we keep accumulating instead of rotating the // accumulators. LLSQ next_points; LLSQ next_dirs; FCOORD next_normed_pos(0.0f, 0.0f); index = GatherPoints(outline, feature_length, denorm, root_denorm, index, end_index, &pos, &next_normed_pos, &next_points, &next_dirs); LLSQ sum_points(prev_points); // TODO(rays) find out why it is better to use just dirs and next_dirs // in sum_dirs, instead of using prev_dirs as well. LLSQ sum_dirs(dirs); sum_points.add(points); sum_points.add(next_points); sum_dirs.add(next_dirs); bool made_features = false; // If we have some points, we can try making some features. if (sum_points.count() > 0) { // We have gone far enough from the start. Make a feature and restart. FCOORD fit_pt = sum_points.mean_point(); FCOORD fit_vector = MeanDirectionVector(sum_points, sum_dirs, prev_normed_pos, normed_pos); // The segment to which we fit features is the line passing through // fit_pt in direction of fit_vector that starts nearest to // prev_normed_pos and ends nearest to normed_pos. FCOORD start_pos = prev_normed_pos.nearest_pt_on_line(fit_pt, fit_vector); FCOORD end_pos = normed_pos.nearest_pt_on_line(fit_pt, fit_vector); // Possible correction to match the adjacent polygon segment. if (total_features == 0 && startpt != endpt) { FCOORD poly_pos(startpt->pos.x, startpt->pos.y); denorm.LocalNormTransform(poly_pos, &start_pos); } if (index > end_index && startpt != endpt) { FCOORD poly_pos(endpt->pos.x, endpt->pos.y); denorm.LocalNormTransform(poly_pos, &end_pos); } int num_features = ComputeFeatures(start_pos, end_pos, feature_length, features); if (num_features > 0) { // We made some features so shuffle the accumulators. prev_points = points; prev_dirs = dirs; prev_normed_pos = normed_pos; points = next_points; dirs = next_dirs; made_features = true; total_features += num_features; } // The end of the next set becomes the end next time around. normed_pos = next_normed_pos; } if (!made_features) { // We didn't make any features, so keep the prev accumulators and // add the next ones into the current. points.add(next_points); dirs.add(next_dirs); } } } else { // There is no outline, so we are forced to use the polygonal approximation. const EDGEPT* pt = startpt; do { FCOORD start_pos(pt->pos.x, pt->pos.y); FCOORD end_pos(pt->next->pos.x, pt->next->pos.y); denorm.LocalNormTransform(start_pos, &start_pos); denorm.LocalNormTransform(end_pos, &end_pos); ComputeFeatures(start_pos, end_pos, feature_length, features); } while ((pt = pt->next) != endpt); } }
// Collects edges into the given bounding box, LLSQ accumulator and/or x_coords, // y_coords vectors. // For a description of x_coords/y_coords, see GetEdgeCoords above. // Startpt to lastpt, inclusive, MUST have the same src_outline member, // which may be NULL. The vector from lastpt to its next is included in // the accumulation. Hidden edges should be excluded by the caller. // The input denorm should be the normalizations that have been applied from // the image to the current state of the TBLOB from which startpt, lastpt come. // box is the bounding box of the blob from which the EDGEPTs are taken and // indices into x_coords, y_coords are offset by box.botleft(). static void CollectEdgesOfRun(const EDGEPT* startpt, const EDGEPT* lastpt, const DENORM& denorm, const TBOX& box, TBOX* bounding_box, LLSQ* accumulator, GenericVector<GenericVector<int> > *x_coords, GenericVector<GenericVector<int> > *y_coords) { const C_OUTLINE* outline = startpt->src_outline; int x_limit = box.width() - 1; int y_limit = box.height() - 1; if (outline != NULL) { // Use higher-resolution edge points stored on the outline. // The outline coordinates may not match the binary image because of the // rotation for vertical text lines, but the root_denorm IS the matching // start of the DENORM chain. const DENORM* root_denorm = denorm.RootDenorm(); int step_length = outline->pathlength(); int start_index = startpt->start_step; // Note that if this run straddles the wrap-around point of the outline, // that lastpt->start_step may have a lower index than startpt->start_step, // and we want to use an end_index that allows us to use a positive // increment, so we add step_length if necessary, but that may be beyond the // bounds of the outline steps/ due to wrap-around, so we use % step_length // everywhere, except for start_index. int end_index = lastpt->start_step + lastpt->step_count; if (end_index <= start_index) end_index += step_length; // pos is the integer coordinates of the binary image steps. ICOORD pos = outline->position_at_index(start_index); FCOORD origin(box.left(), box.bottom()); // f_pos is a floating-point version of pos that offers improved edge // positioning using greyscale information or smoothing of edge steps. FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, start_index); // pos_normed is f_pos after the appropriate normalization, and relative // to origin. // prev_normed is the previous value of pos_normed. FCOORD prev_normed; denorm.NormTransform(root_denorm, f_pos, &prev_normed); prev_normed -= origin; for (int index = start_index; index < end_index; ++index) { ICOORD step = outline->step(index % step_length); // Only use the point if its edge strength is positive. This excludes // points that don't provide useful information, eg // ___________ // |___________ // The vertical step provides only noisy, damaging information, as even // with a greyscale image, the positioning of the edge there may be a // fictitious extrapolation, so previous processing has eliminated it. if (outline->edge_strength_at_index(index % step_length) > 0) { FCOORD f_pos = outline->sub_pixel_pos_at_index(pos, index % step_length); FCOORD pos_normed; denorm.NormTransform(root_denorm, f_pos, &pos_normed); pos_normed -= origin; // Accumulate the information that is selected by the caller. if (bounding_box != NULL) { SegmentBBox(pos_normed, prev_normed, bounding_box); } if (accumulator != NULL) { SegmentLLSQ(pos_normed, prev_normed, accumulator); } if (x_coords != NULL && y_coords != NULL) { SegmentCoords(pos_normed, prev_normed, x_limit, y_limit, x_coords, y_coords); } prev_normed = pos_normed; } pos += step; } } else { // There is no outline, so we are forced to use the polygonal approximation. const EDGEPT* endpt = lastpt->next; const EDGEPT* pt = startpt; do { FCOORD next_pos(pt->next->pos.x - box.left(), pt->next->pos.y - box.bottom()); FCOORD pos(pt->pos.x - box.left(), pt->pos.y - box.bottom()); if (bounding_box != NULL) { SegmentBBox(next_pos, pos, bounding_box); } if (accumulator != NULL) { SegmentLLSQ(next_pos, pos, accumulator); } if (x_coords != NULL && y_coords != NULL) { SegmentCoords(next_pos, pos, x_limit, y_limit, x_coords, y_coords); } } while ((pt = pt->next) != endpt); } }