// Helper returns the mean pixel value over the line between the start_pt and // end_pt (inclusive), but shifted perpendicular to the line in the projection // image by offset pixels. For simplicity, it is assumed that the vector is // either nearly horizontal or nearly vertical. It works on skewed textlines! // The end points are in external coordinates, and will be denormalized with // the denorm if not NULL before further conversion to pix coordinates. // After all the conversions, the offset is added to the direction // perpendicular to the line direction. The offset is thus in projection image // coordinates, which allows the caller to get a guaranteed displacement // between pixels used to calculate gradients. int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm, int offset, TPOINT start_pt, TPOINT end_pt) const { TransformToPixCoords(denorm, &start_pt); TransformToPixCoords(denorm, &end_pt); TruncateToImageBounds(&start_pt); TruncateToImageBounds(&end_pt); int wpl = pixGetWpl(pix_); uint32_t* data = pixGetData(pix_); int total = 0; int count = 0; int x_delta = end_pt.x - start_pt.x; int y_delta = end_pt.y - start_pt.y; if (abs(x_delta) >= abs(y_delta)) { if (x_delta == 0) return 0; // Horizontal line. Add the offset vertically. int x_step = x_delta > 0 ? 1 : -1; // Correct offset for rotation, keeping it anti-clockwise of the delta. offset *= x_step; start_pt.y += offset; end_pt.y += offset; TruncateToImageBounds(&start_pt); TruncateToImageBounds(&end_pt); x_delta = end_pt.x - start_pt.x; y_delta = end_pt.y - start_pt.y; count = x_delta * x_step + 1; for (int x = start_pt.x; x != end_pt.x; x += x_step) { int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta); total += GET_DATA_BYTE(data + wpl * y, x); } } else { // Vertical line. Add the offset horizontally. int y_step = y_delta > 0 ? 1 : -1; // Correct offset for rotation, keeping it anti-clockwise of the delta. // Pix holds the image with y=0 at the top, so the offset is negated. offset *= -y_step; start_pt.x += offset; end_pt.x += offset; TruncateToImageBounds(&start_pt); TruncateToImageBounds(&end_pt); x_delta = end_pt.x - start_pt.x; y_delta = end_pt.y - start_pt.y; count = y_delta * y_step + 1; for (int y = start_pt.y; y != end_pt.y; y += y_step) { int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta); total += GET_DATA_BYTE(data + wpl * y, x); } } return DivRounded(total, count); }
// Returns an INT_FEATURE_STRUCT corresponding to the given bucket coords. INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x, int y, int theta) const { INT_FEATURE_STRUCT pos( (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_, (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_, DivRounded(theta * kIntFeatureExtent, theta_buckets_)); return pos; }
// Returns an INT_FEATURE_STRUCT corresponding to the given bucket coords. INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x, int y, int theta) const { INT_FEATURE_STRUCT pos = { static_cast<uinT8>(ClipToRange( (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_, 0, MAX_UINT8)), static_cast<uinT8>(ClipToRange( (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_, 0, MAX_UINT8)), static_cast<uinT8>(ClipToRange( DivRounded(theta * kIntFeatureExtent, theta_buckets_), 0, MAX_UINT8))}; return pos; }
// Returns a new x-height maximally compatible with the result in word_res. // See comment above for overall algorithm. float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res) { STATS top_stats(0, MAX_UINT8); TBLOB* blob = word_res->rebuild_word->blobs; int blob_id = 0; for (; blob != NULL; blob = blob->next, ++blob_id) { UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id); if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) { int top = blob->bounding_box().top(); // Clip the top to the limit of normalized feature space. if (top >= INT_FEAT_RANGE) top = INT_FEAT_RANGE - 1; int bottom = blob->bounding_box().bottom(); int min_bottom, max_bottom, min_top, max_top; unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, &max_top); // Chars with a wild top range would mess up the result so ignore them. if (max_top - min_top > kMaxCharTopRange) continue; int misfit_dist = MAX((min_top - x_ht_acceptance_tolerance) - top, top - (max_top + x_ht_acceptance_tolerance)); int height = top - kBlnBaselineOffset; if (debug_x_ht_level >= 20) { tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d : ", unicharset.id_to_unichar(class_id), height, min_bottom, max_bottom, min_top, max_top, bottom, top); } // Use only chars that fit in the expected bottom range, and where // the range of tops is sensibly near the xheight. if (min_bottom <= bottom + x_ht_acceptance_tolerance && bottom - x_ht_acceptance_tolerance <= max_bottom && min_top > kBlnBaselineOffset && max_top - kBlnBaselineOffset >= kBlnXHeight && misfit_dist > 0) { // Compute the x-height position using proportionality between the // actual height and expected height. int min_xht = DivRounded(height * kBlnXHeight, max_top - kBlnBaselineOffset); int max_xht = DivRounded(height * kBlnXHeight, min_top - kBlnBaselineOffset); if (debug_x_ht_level >= 20) { tprintf(" xht range min=%d, max=%d\n", min_xht, max_xht); } // The range of expected heights gets a vote equal to the distance // of the actual top from the expected top. for (int y = min_xht; y <= max_xht; ++y) top_stats.add(y, misfit_dist); } else if (debug_x_ht_level >= 20) { tprintf(" already OK\n"); } } } if (top_stats.get_total() == 0) return 0.0f; // The new xheight is just the median vote, which is then scaled out // of BLN space back to pixel space to get the x-height in pixel space. float new_xht = top_stats.median(); if (debug_x_ht_level >= 20) { tprintf("Median xht=%f\n", new_xht); tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n", new_xht, new_xht / word_res->denorm.y_scale()); } // The xheight must change by at least x_ht_min_change to be used. if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) return new_xht / word_res->denorm.y_scale(); else return 0.0f; }
// Returns a new x-height maximally compatible with the result in word_res. // See comment above for overall algorithm. float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float* baseline_shift) { STATS top_stats(0, MAX_UINT8); STATS shift_stats(-MAX_UINT8, MAX_UINT8); int bottom_shift = 0; int num_blobs = word_res->rebuild_word->NumBlobs(); do { top_stats.clear(); shift_stats.clear(); for (int blob_id = 0; blob_id < num_blobs; ++blob_id) { TBLOB* blob = word_res->rebuild_word->blobs[blob_id]; UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id); if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) { int top = blob->bounding_box().top() + bottom_shift; // Clip the top to the limit of normalized feature space. if (top >= INT_FEAT_RANGE) top = INT_FEAT_RANGE - 1; int bottom = blob->bounding_box().bottom() + bottom_shift; int min_bottom, max_bottom, min_top, max_top; unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, &max_top); // Chars with a wild top range would mess up the result so ignore them. if (max_top - min_top > kMaxCharTopRange) continue; int misfit_dist = MAX((min_top - x_ht_acceptance_tolerance) - top, top - (max_top + x_ht_acceptance_tolerance)); int height = top - kBlnBaselineOffset; if (debug_x_ht_level >= 2) { tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d: ", unicharset.id_to_unichar(class_id), height, min_bottom, max_bottom, min_top, max_top, bottom, top); } // Use only chars that fit in the expected bottom range, and where // the range of tops is sensibly near the xheight. if (min_bottom <= bottom + x_ht_acceptance_tolerance && bottom - x_ht_acceptance_tolerance <= max_bottom && min_top > kBlnBaselineOffset && max_top - kBlnBaselineOffset >= kBlnXHeight && misfit_dist > 0) { // Compute the x-height position using proportionality between the // actual height and expected height. int min_xht = DivRounded(height * kBlnXHeight, max_top - kBlnBaselineOffset); int max_xht = DivRounded(height * kBlnXHeight, min_top - kBlnBaselineOffset); if (debug_x_ht_level >= 2) { tprintf(" xht range min=%d, max=%d\n", min_xht, max_xht); } // The range of expected heights gets a vote equal to the distance // of the actual top from the expected top. for (int y = min_xht; y <= max_xht; ++y) top_stats.add(y, misfit_dist); } else if ((min_bottom > bottom + x_ht_acceptance_tolerance || bottom - x_ht_acceptance_tolerance > max_bottom) && bottom_shift == 0) { // Get the range of required bottom shift. int min_shift = min_bottom - bottom; int max_shift = max_bottom - bottom; if (debug_x_ht_level >= 2) { tprintf(" bottom shift min=%d, max=%d\n", min_shift, max_shift); } // The range of expected shifts gets a vote equal to the min distance // of the actual bottom from the expected bottom, spread over the // range of its acceptance. int misfit_weight = abs(min_shift); if (max_shift > min_shift) misfit_weight /= max_shift - min_shift; for (int y = min_shift; y <= max_shift; ++y) shift_stats.add(y, misfit_weight); } else { if (bottom_shift == 0) { // Things with bottoms that are already ok need to say so, on the // 1st iteration only. shift_stats.add(0, kBlnBaselineOffset); } if (debug_x_ht_level >= 2) { tprintf(" already OK\n"); } } } } if (shift_stats.get_total() > top_stats.get_total()) { bottom_shift = IntCastRounded(shift_stats.median()); if (debug_x_ht_level >= 2) { tprintf("Applying bottom shift=%d\n", bottom_shift); } } } while (bottom_shift != 0 && top_stats.get_total() < shift_stats.get_total()); // Baseline shift is opposite sign to the bottom shift. *baseline_shift = -bottom_shift / word_res->denorm.y_scale(); if (debug_x_ht_level >= 2) { tprintf("baseline shift=%g\n", *baseline_shift); } if (top_stats.get_total() == 0) return bottom_shift != 0 ? word_res->x_height : 0.0f; // The new xheight is just the median vote, which is then scaled out // of BLN space back to pixel space to get the x-height in pixel space. float new_xht = top_stats.median(); if (debug_x_ht_level >= 2) { tprintf("Median xht=%f\n", new_xht); tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n", new_xht, new_xht / word_res->denorm.y_scale()); } // The xheight must change by at least x_ht_min_change to be used. if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) return new_xht / word_res->denorm.y_scale(); else return bottom_shift != 0 ? word_res->x_height : 0.0f; }