Beispiel #1
0
// Returns the x,y means as an FCOORD.
FCOORD LLSQ::mean_point() const {
  if (total_weight > 0.0) {
    return FCOORD(sigx / total_weight, sigy / total_weight);
  } else {
    return FCOORD(0.0f, 0.0f);
  }
}
Beispiel #2
0
void make_words(                             //make words
                ICOORD page_tr,              //top right
                float gradient,              //page skew
                BLOCK_LIST *blocks,          //block list
                TO_BLOCK_LIST *land_blocks,  //rotated for landscape
                TO_BLOCK_LIST *port_blocks,  //output list
                tesseract::Tesseract* tess
               ) {
  TO_BLOCK_IT block_it;          //iterator
  TO_BLOCK *block;               //current block;

  compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD (0.0f, -1.0f),
                      !(BOOL8) textord_test_landscape, tess);
  if (global_monitor != NULL) {
    global_monitor->ocr_alive = TRUE;
    global_monitor->progress = 25;
  }
  to_spacing(page_tr, port_blocks);
  block_it.set_to_list (port_blocks);
  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
  block_it.forward ()) {
    block = block_it.data ();
    //              set_row_spaces(block,FCOORD(1,0),!(BOOL8)textord_test_landscape);
                                 //make proper classes
    make_real_words (block, FCOORD (1.0f, 0.0f));
  }
}
void OUTLINE::scale(                     // scale OUTLINE
                    const FCOORD vector  //by fcoord
                   ) {
                                 //child outline itertr
  OUTLINE_IT child_it(&children);
  POLYPT_IT poly_it(&outline);  //outline point itertr
  POLYPT *pt;

  box.scale (vector);

  start.set_x ((inT16) floor (start.x () * vector.x () + 0.5));
  // ?? Why ICOORD?
  start.set_y ((inT16) floor (start.y () * vector.y () + 0.5));
  // ?? Why ICOORD?

  for (poly_it.mark_cycle_pt (); !poly_it.cycled_list (); poly_it.forward ()) {
    pt = poly_it.data ();
    pt->pos =
      FCOORD (pt->pos.x () * vector.x (), pt->pos.y () * vector.y ());
    pt->vec =
      FCOORD (pt->vec.x () * vector.x (), pt->vec.y () * vector.y ());
  }

  for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
    child_it.forward ())
                                 //scale child outlines
  child_it.data ()->scale (vector);
}
Beispiel #4
0
void draw_meanlines(                  //draw a block
        TO_BLOCK *block,  //block to draw
        float gradient,   //gradients of lines
        inT32 left,       //edge of block
        ScrollView::Color colour,    //colour to draw in
        FCOORD rotation   //rotation for line
) {
    FCOORD plot_pt;                //point to plot
    //rows
    TO_ROW_IT row_it = block->get_rows();
    TO_ROW *row;                   //current row
    BLOBNBOX_IT blob_it;           //blobs
    float right;                   //end of row
    to_win->Pen(colour);
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
        row = row_it.data();
        blob_it.set_to_list(row->blob_list());
        blob_it.move_to_last();
        right = blob_it.data()->bounding_box().right();
        plot_pt =
                FCOORD((float) left,
                       gradient * left + row->parallel_c() + row->xheight);
        plot_pt.rotate(rotation);
        to_win->SetCursor(plot_pt.x(), plot_pt.y());
        plot_pt =
                FCOORD((float) right,
                       gradient * right + row->parallel_c() + row->xheight);
        plot_pt.rotate(rotation);
        to_win->DrawTo(plot_pt.x(), plot_pt.y());
    }
}
Beispiel #5
0
void plot_to_row(                 //draw a row
        TO_ROW *row,     //row to draw
        ScrollView::Color colour,   //colour to draw in
        FCOORD rotation  //rotation for line
) {
    FCOORD plot_pt;                //point to plot
    //blobs
    BLOBNBOX_IT it = row->blob_list();
    float left, right;             //end of row

    if (it.empty()) {
        tprintf("No blobs in row at %g\n", row->parallel_c());
        return;
    }
    left = it.data()->bounding_box().left();
    it.move_to_last();
    right = it.data()->bounding_box().right();
    plot_blob_list(to_win, row->blob_list(), colour, ScrollView::BROWN);
    to_win->Pen(colour);
    plot_pt = FCOORD(left, row->line_m() * left + row->line_c());
    plot_pt.rotate(rotation);
    to_win->SetCursor(plot_pt.x(), plot_pt.y());
    plot_pt = FCOORD(right, row->line_m() * right + row->line_c());
    plot_pt.rotate(rotation);
    to_win->DrawTo(plot_pt.x(), plot_pt.y());
}
Beispiel #6
0
void plot_parallel_row(                 //draw a row
        TO_ROW *row,     //row to draw
        float gradient,  //gradients of lines
        inT32 left,      //edge of block
        ScrollView::Color colour,   //colour to draw in
        FCOORD rotation  //rotation for line
) {
    FCOORD plot_pt;                //point to plot
    //blobs
    BLOBNBOX_IT it = row->blob_list();
    float fleft = (float) left;    //floating version
    float right;                   //end of row

    //      left=it.data()->bounding_box().left();
    it.move_to_last();
    right = it.data()->bounding_box().right();
    plot_blob_list(to_win, row->blob_list(), colour, ScrollView::BROWN);
    to_win->Pen(colour);
    plot_pt = FCOORD(fleft, gradient * left + row->max_y());
    plot_pt.rotate(rotation);
    to_win->SetCursor(plot_pt.x(), plot_pt.y());
    plot_pt = FCOORD(fleft, gradient * left + row->min_y());
    plot_pt.rotate(rotation);
    to_win->DrawTo(plot_pt.x(), plot_pt.y());
    plot_pt = FCOORD(fleft, gradient * left + row->parallel_c());
    plot_pt.rotate(rotation);
    to_win->SetCursor(plot_pt.x(), plot_pt.y());
    plot_pt = FCOORD(right, gradient * right + row->parallel_c());
    plot_pt.rotate(rotation);
    to_win->DrawTo(plot_pt.x(), plot_pt.y());
}
void OUTLINE::compute_bb() {  //constructor
  ICOORD ibl, itr;               //integer bb
  FCOORD botleft;                //bounding box
  FCOORD topright;
  FCOORD pos;                    //current pos;
  POLYPT_IT polypts = &outline;  //iterator

  botleft = polypts.data ()->pos;
  topright = botleft;
  start = ICOORD ((inT16) botleft.x (), (inT16) botleft.y ());
  do {
    pos = polypts.data ()->pos;
    if (pos.x () < botleft.x ())
                                 //get bounding box
      botleft = FCOORD (pos.x (), botleft.y ());
    if (pos.y () < botleft.y ())
      botleft = FCOORD (botleft.x (), pos.y ());
    if (pos.x () > topright.x ())
      topright = FCOORD (pos.x (), topright.y ());
    if (pos.y () > topright.y ())
      topright = FCOORD (topright.x (), pos.y ());
    polypts.forward ();
  }
  while (!polypts.at_first ());
  ibl = ICOORD ((inT16) botleft.x (), (inT16) botleft.y ());
  itr = ICOORD ((inT16) topright.x () + 1, (inT16) topright.y () + 1);
  box = TBOX (ibl, itr);
}
BOOL8
OUTLINE::operator< (             //winding number
OUTLINE & other                  //other outline
) {
  inT16 count;                   //winding count
  POLYPT_IT it = &outline;       //iterator

  if (!box.overlap (other.box))
    return FALSE;                //can't be contained

  do {
    count = other.winding_number (FCOORD (it.data ()->pos));
    //get winding number
    if (count != INTERSECTING)
      return count != 0;
    it.forward ();
  }
  while (!it.at_first ());

                                 //switch lists
  it.set_to_list (&other.outline);
  do {
                                 //try other way round
    count = winding_number (FCOORD (it.data ()->pos));
    if (count != INTERSECTING)
      return count == 0;
    it.forward ();
  }
  while (!it.at_first ());
  return TRUE;
}
/**********************************************************************
 * OUTLINE::OUTLINE
 *
 * Constructor to build a OUTLINE from a compact LOOP.
 **********************************************************************/
OUTLINE::OUTLINE (               //constructor
const ICOORD & startpt,          //start position
inT8 * compactloop,              //from Tess format
BOOL8 invert,                    //reverse it
ICOORD bot_left,                 //bounding box
ICOORD top_right):
box (bot_left, top_right),
start(startpt) {
  ICOORD pos;                    //current point
  ICOORD vec;                    //vector to next
  POLYPT *polypt;                //new point
  inT8 *vector;                  //compact loop
  POLYPT_IT it = &outline;       //iterator

  pos = startpt;
  vector = compactloop;
  do {
                                 //vector to next
    vec = ICOORD (*vector, *(vector + 1));
                                 //make a new one
    polypt = new POLYPT (FCOORD (pos), FCOORD (vec));
                                 //add to list
    it.add_after_then_move (polypt);
    pos += vec;                  //move to next
    vector += 2;
  }
  while (pos != startpt);
  if (invert)
    reverse();  //now reverse it
}
void Tesseract::Clear() {
#ifdef HAVE_LIBLEPT
  if (pix_binary_ != NULL)
    pixDestroy(&pix_binary_);
#endif
 deskew_ = FCOORD(1.0f, 0.0f);
 reskew_ = FCOORD(1.0f, 0.0f);
}
OUTLINE *make_ed_outline(                     //constructoutline
                         FRAGMENT_LIST *list  //list of fragments
                        ) {
  FRAGMENT *fragment;            //current fragment
  EDGEPT *edgept;                //current point
  ICOORD headpos;                //coords of head
  ICOORD tailpos;                //coords of tail
  FCOORD pos;                    //coords of edgept
  FCOORD vec;                    //empty
  POLYPT *polypt;                //current point
  POLYPT_LIST poly_list;         //list of point
  POLYPT_IT poly_it = &poly_list;//iterator
  FRAGMENT_IT fragment_it = list;//fragment

  headpos = fragment_it.data ()->head;
  do {
    fragment = fragment_it.data ();
    edgept = fragment->headpt;   //start of segment
    do {
      pos = FCOORD (edgept->pos.x, edgept->pos.y);
      vec = FCOORD (edgept->vec.x, edgept->vec.y);
      polypt = new POLYPT (pos, vec);
                                 //add to list
      poly_it.add_after_then_move (polypt);
      edgept = edgept->next;
    }
    while (edgept != fragment->tailpt);
    tailpos = ICOORD (edgept->pos.x, edgept->pos.y);
                                 //get rid of it
    delete fragment_it.extract ();
    if (tailpos != headpos) {
      if (fragment_it.empty ()) {
        return NULL;
      }
      fragment_it.forward ();
                                 //find next segment
      for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list () &&
               fragment_it.data ()->head != tailpos;
        fragment_it.forward ());
      if (fragment_it.data ()->head != tailpos) {
        // It is legitimate for the heads to not all match to tails,
        // since not all combinations of seams always make sense.
        for (fragment_it.mark_cycle_pt ();
        !fragment_it.cycled_list (); fragment_it.forward ()) {
          fragment = fragment_it.extract ();
          delete fragment;
        }
        return NULL;             //can't do it
      }
    }
  }
  while (tailpos != headpos);
  return new OUTLINE (&poly_it); //turn to outline
}
Beispiel #12
0
void Tesseract::Clear() {
  pixDestroy(&pix_binary_);
  pixDestroy(&cube_binary_);
  pixDestroy(&pix_grey_);
  pixDestroy(&scaled_color_);
  deskew_ = FCOORD(1.0f, 0.0f);
  reskew_ = FCOORD(1.0f, 0.0f);
  splitter_.Clear();
  scaled_factor_ = -1;
  ResetFeaturesHaveBeenExtracted();
  for (int i = 0; i < sub_langs_.size(); ++i)
    sub_langs_[i]->Clear();
}
void PBLOB::baseline_denormalise(                      // Tess style BL Norm
                                 const DENORM *denorm  //antidote
                                ) {
  float blob_x_left;           // Left edge of blob.
  TBOX blob_box;                  //blob bounding box

  move(FCOORD (0.0f, 0.0f - bln_baseline_offset));
  blob_box = bounding_box ();
  blob_x_left = blob_box.left ();
  scale (1.0 / denorm->scale_at_x (blob_x_left));
  move (FCOORD (denorm->origin (),
    denorm->yshift_at_x (blob_x_left)));
}
void Tesseract::Clear() {
  pixDestroy(&pix_binary_);
  pixDestroy(&cube_binary_);
  pixDestroy(&pix_grey_);
  pixDestroy(&pix_thresholds_);
  pixDestroy(&scaled_color_);
  deskew_ = FCOORD(1.0f, 0.0f);
  reskew_ = FCOORD(1.0f, 0.0f);
  splitter_.Clear();
  scaled_factor_ = -1;
  for (int i = 0; i < sub_langs_.size(); ++i)
    sub_langs_[i]->Clear();
}
// Helper function to make a fake PBLOB formed from the bounding box
// of the given old-format outline.
static PBLOB* MakeRectBlob(TESSLINE* ol) {
  POLYPT_LIST poly_list;
  POLYPT_IT poly_it = &poly_list;
  FCOORD pos, vec;
  POLYPT *polypt;

  // Create points at each of the 4 corners of the rectangle in turn.
  pos = FCOORD(ol->topleft.x, ol->topleft.y);
  vec = FCOORD(0.0f, ol->botright.y - ol->topleft.y);
  polypt = new POLYPT(pos, vec);
  poly_it.add_after_then_move(polypt);
  pos = FCOORD(ol->topleft.x, ol->botright.y);
  vec = FCOORD(ol->botright.x - ol->topleft.x, 0.0f);
  polypt = new POLYPT(pos, vec);
  poly_it.add_after_then_move(polypt);
  pos = FCOORD(ol->botright.x, ol->botright.y);
  vec = FCOORD(0.0f, ol->topleft.y - ol->botright.y);
  polypt = new POLYPT(pos, vec);
  poly_it.add_after_then_move(polypt);
  pos = FCOORD(ol->botright.x, ol->topleft.y);
  vec = FCOORD(ol->topleft.x - ol->botright.x, 0.0f);
  polypt = new POLYPT(pos, vec);
  poly_it.add_after_then_move(polypt);

  OUTLINE_LIST out_list;
  OUTLINE_IT out_it = &out_list;
  out_it.add_after_then_move(new OUTLINE(&poly_it));
  return new PBLOB(&out_list);
}
PBLOB *PBLOB::baseline_normalise(                //normalize blob
                                 ROW *row,       //row it came from
                                 DENORM *denorm  //inverse mapping
                                ) {
  TBOX blob_box = bounding_box ();
  float x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
  PBLOB *bn_blob;                //copied blob

  *denorm = DENORM (x_centre, bln_x_height / row->x_height (), row);
  bn_blob = new PBLOB;           //get one
  *bn_blob = *this;              //deep copy
  bn_blob->move (FCOORD (-denorm->origin (), -row->base_line (x_centre)));
  bn_blob->scale (denorm->scale ());
  bn_blob->move (FCOORD (0.0, bln_baseline_offset));
  return bn_blob;
}
void PBLOB::rotate() {  // Rotate 90 deg anti
  OUTLINE_IT it(&outlines);  // iterator

  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
    it.data ()->rotate (FCOORD(0.0f, 1.0f));     // rotate each outline
  }
}
Beispiel #18
0
void make_words(tesseract::Textord *textord,
                ICOORD page_tr,                // top right
                float gradient,                // page skew
                BLOCK_LIST *blocks,            // block list
                TO_BLOCK_LIST *port_blocks) {  // output list
  TO_BLOCK_IT block_it;          // iterator
  TO_BLOCK *block;               // current block

  compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
                      !(BOOL8) textord_test_landscape);
  textord->to_spacing(page_tr, port_blocks);
  block_it.set_to_list(port_blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    block = block_it.data();
    make_real_words(textord, block, FCOORD(1.0f, 0.0f));
  }
}
/**********************************************************************
 * make_rotated_tess_blob
 *
 * Make a single Tess style blob, applying the given rotation and
 * renormalizing.
 **********************************************************************/
TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob,
                              BOOL8 flatten) {
  if (denorm != NULL && denorm->block() != NULL &&
      denorm->block()->classify_rotation().y() != 0.0) {
    TBOX box = blob->bounding_box();
    int src_width = box.width();
    int src_height = box.height();
    src_width = static_cast<int>(src_width / denorm->scale() + 0.5);
    src_height = static_cast<int>(src_height / denorm->scale() + 0.5);
    int x_middle = (box.left() + box.right()) / 2;
    int y_middle = (box.top() + box.bottom()) / 2;
    PBLOB* rotated_blob = PBLOB::deep_copy(blob);
    rotated_blob->move(FCOORD(-x_middle, -y_middle));
    rotated_blob->rotate(denorm->block()->classify_rotation());
    ICOORD median_size = denorm->block()->median_size();
    int tolerance = median_size.x() / 8;
    // TODO(dsl/rays) find a better normalization solution. In the mean time
    // make it work for CJK by normalizing for Cap height in the same way
    // as is applied in compute_block_xheight when the row is presumed to
    // be ALLCAPS, i.e. the x-height is the fixed fraction
    // blob height * textord_merge_x / (textord_merge_x + textord_merge_asc)
    if (NearlyEqual(src_width, static_cast<int>(median_size.x()), tolerance) &&
        NearlyEqual(src_height, static_cast<int>(median_size.y()), tolerance)) {
      float target_height = bln_x_height * (textord_merge_x + textord_merge_asc)
                          / textord_merge_x;
      rotated_blob->scale(target_height / box.width());
      rotated_blob->move(FCOORD(0.0f,
                                bln_baseline_offset -
                                  rotated_blob->bounding_box().bottom()));
    }
    TBLOB* result = make_tess_blob(rotated_blob, flatten);
    delete rotated_blob;
    return result;
  } else {
    return make_tess_blob(blob, flatten);
  }
}
Beispiel #20
0
// Returns the direction of the fitted line as a unit vector, using the
// least mean squared perpendicular distance. The line runs through the
// mean_point, i.e. a point p on the line is given by:
// p = mean_point() + lambda * vector_fit() for some real number lambda.
// Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous
// and may be negated without changing its meaning.
FCOORD LLSQ::vector_fit() const {
  double x_var = x_variance();
  double y_var = y_variance();
  double covar = covariance();
  FCOORD result;
  if (x_var >= y_var) {
    if (x_var == 0.0)
      return FCOORD(0.0f, 0.0f);
    result.set_x(x_var / sqrt(x_var * x_var + covar * covar));
    result.set_y(sqrt(1.0 - result.x() * result.x()));
  } else {
    result.set_y(y_var / sqrt(y_var * y_var + covar * covar));
    result.set_x(sqrt(1.0 - result.y() * result.y()));
  }
  if (covar < 0.0)
    result.set_y(-result.y());
  return result;
}
Beispiel #21
0
// Copies the bounding box from page_res_it->word() to the given TBOX.
bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) {
  while (page_res_it->block() != NULL && page_res_it->word() == NULL)
    page_res_it->forward();

  if (page_res_it->word() != NULL) {
    *tbox = page_res_it->word()->word->bounding_box();

    // If tbox->left() is negative, the training image has vertical text and
    // all the coordinates of bounding boxes of page_res are rotated by 90
    // degrees in a counterclockwise direction. We need to rotate the TBOX back
    // in order to compare with the TBOXes of box files.
    if (tbox->left() < 0) {
      tbox->rotate(FCOORD(0.0, -1.0));
    }

    return true;
  } else {
    return false;
  }
}
void PBLOB::rotate() {  // Rotate 90 deg anti
  rotate(FCOORD(0.0f, 1.0f));
}
Beispiel #23
0
// Fits a straight baseline to the points. Returns true if it had enough
// points to be reasonably sure of the fitted baseline.
// If use_box_bottoms is false, baselines positions are formed by
// considering the outlines of the blobs.
bool BaselineRow::FitBaseline(bool use_box_bottoms) {
  // Deterministic fitting is used wherever possible.
  fitter_.Clear();
  // Linear least squares is a backup if the DetLineFit produces a bad line.
  LLSQ llsq;
  BLOBNBOX_IT blob_it(blobs_);

  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    if (!use_box_bottoms) blob->EstimateBaselinePosition();
    const TBOX& box = blob->bounding_box();
    int x_middle = (box.left() + box.right()) / 2;
#ifdef kDebugYCoord
    if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
      tprintf("Box bottom = %d, baseline pos=%d for box at:",
              box.bottom(), blob->baseline_position());
      box.print();
    }
#endif
    fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
    llsq.add(x_middle, blob->baseline_position());
  }
  // Fit the line.
  ICOORD pt1, pt2;
  baseline_error_ = fitter_.Fit(&pt1, &pt2);
  baseline_pt1_ = pt1;
  baseline_pt2_ = pt2;
  if (baseline_error_ > max_baseline_error_ &&
      fitter_.SufficientPointsForIndependentFit()) {
    // The fit was bad but there were plenty of points, so try skipping
    // the first and last few, and use the new line if it dramatically improves
    // the error of fit.
    double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
    if (error < baseline_error_ / 2.0) {
      baseline_error_ = error;
      baseline_pt1_ = pt1;
      baseline_pt2_ = pt2;
    }
  }
  int debug = 0;
#ifdef kDebugYCoord
  Print();
  debug = bounding_box_.bottom() < kDebugYCoord &&
      bounding_box_.top() > kDebugYCoord
            ? 3 : 2;
#endif
  // Now we obtained a direction from that fit, see if we can improve the
  // fit using the same direction and some other start point.
  FCOORD direction(pt2 - pt1);
  double target_offset = direction * pt1;
  good_baseline_ = false;
  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
  // Wild lines can be produced because DetLineFit allows vertical lines, but
  // vertical text has been rotated so angles over pi/4 should be disallowed.
  // Near vertical lines can still be produced by vertically aligned components
  // on very short lines.
  double angle = BaselineAngle();
  if (fabs(angle) > M_PI * 0.25) {
    // Use the llsq fit as a backup.
    baseline_pt1_ = llsq.mean_point();
    baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
    // TODO(rays) get rid of this when m and c are no longer used.
    double m = llsq.m();
    double c = llsq.c(m);
    baseline_error_ = llsq.rms(m, c);
    good_baseline_ = false;
  }
  return good_baseline_;
}
Beispiel #24
0
C_OUTLINE::C_OUTLINE(                     //constructor
                     C_OUTLINE *srcline,  //outline to
                     FCOORD rotation      //rotate
                    ) : offsets(NULL) {
  TBOX new_box;                   //easy bounding
  inT16 stepindex;               //index to step
  inT16 dirdiff;                 //direction change
  ICOORD pos;                    //current position
  ICOORD prevpos;                //previous dest point

  ICOORD destpos;                //destination point
  inT16 destindex;               //index to step
  DIR128 dir;                    //coded direction
  uinT8 new_step;

  stepcount = srcline->stepcount * 2;
  if (stepcount == 0) {
    steps = NULL;
    box = srcline->box;
    box.rotate(rotation);
    return;
  }
                                 //get memory
  steps = (uinT8 *) alloc_mem (step_mem());
  memset(steps, 0, step_mem());

  for (int iteration = 0; iteration < 2; ++iteration) {
    DIR128 round1 = iteration == 0 ? 32 : 0;
    DIR128 round2 = iteration != 0 ? 32 : 0;
    pos = srcline->start;
    prevpos = pos;
    prevpos.rotate (rotation);
    start = prevpos;
    box = TBOX (start, start);
    destindex = 0;
    for (stepindex = 0; stepindex < srcline->stepcount; stepindex++) {
      pos += srcline->step (stepindex);
      destpos = pos;
      destpos.rotate (rotation);
      //  tprintf("%i %i %i %i ", destpos.x(), destpos.y(), pos.x(), pos.y());
      while (destpos.x () != prevpos.x () || destpos.y () != prevpos.y ()) {
        dir = DIR128 (FCOORD (destpos - prevpos));
        dir += 64;                 //turn to step style
        new_step = dir.get_dir ();
        //  tprintf(" %i\n", new_step);
        if (new_step & 31) {
          set_step(destindex++, dir + round1);
          prevpos += step(destindex - 1);
          if (destindex < 2
            || ((dirdiff =
            step_dir (destindex - 1) - step_dir (destindex - 2)) !=
            -64 && dirdiff != 64)) {
            set_step(destindex++, dir + round2);
            prevpos += step(destindex - 1);
          } else {
            prevpos -= step(destindex - 1);
            destindex--;
            prevpos -= step(destindex - 1);
            set_step(destindex - 1, dir + round2);
            prevpos += step(destindex - 1);
          }
        }
        else {
          set_step(destindex++, dir);
          prevpos += step(destindex - 1);
        }
        while (destindex >= 2 &&
               ((dirdiff =
                 step_dir (destindex - 1) - step_dir (destindex - 2)) == -64 ||
                dirdiff == 64)) {
          prevpos -= step(destindex - 1);
          prevpos -= step(destindex - 2);
          destindex -= 2;        // Forget u turn
        }
        //ASSERT_HOST(prevpos.x() == destpos.x() && prevpos.y() == destpos.y());
        new_box = TBOX (destpos, destpos);
        box += new_box;
      }
    }
    ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ());
    dirdiff = step_dir (destindex - 1) - step_dir (0);
    while ((dirdiff == 64 || dirdiff == -64) && destindex > 1) {
      start += step (0);
      destindex -= 2;
      for (int i = 0; i < destindex; ++i)
        set_step(i, step_dir(i + 1));
      dirdiff = step_dir (destindex - 1) - step_dir (0);
    }
    if (destindex >= 4)
      break;
  }
  ASSERT_HOST(destindex <= stepcount);
  stepcount = destindex;
  destpos = start;
  for (stepindex = 0; stepindex < stepcount; stepindex++) {
    destpos += step (stepindex);
  }
  ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ());
}
Beispiel #25
0
/**
 * Segment the page according to the current value of tessedit_pageseg_mode.
 * pix_binary_ is used as the source image and should not be NULL.
 * On return the blocks list owns all the constructed page layout.
 */
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
                           Tesseract* osd_tess, OSResults* osr) {
  ASSERT_HOST(pix_binary_ != NULL);
  int width = pixGetWidth(pix_binary_);
  int height = pixGetHeight(pix_binary_);
  // Get page segmentation mode.
  PageSegMode pageseg_mode = static_cast<PageSegMode>(
      static_cast<int>(tessedit_pageseg_mode));
  // If a UNLV zone file can be found, use that instead of segmentation.
  if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
      input_file != NULL && input_file->length() > 0) {
    STRING name = *input_file;
    const char* lastdot = strrchr(name.string(), '.');
    if (lastdot != NULL)
      name[lastdot - name.string()] = '\0';
    read_unlv_file(name, width, height, blocks);
  }
  if (blocks->empty()) {
    // No UNLV file present. Work according to the PageSegMode.
    // First make a single block covering the whole image.
    BLOCK_IT block_it(blocks);
    BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
    block->set_right_to_left(right_to_left());
    block_it.add_to_end(block);
  } else {
    // UNLV file present. Use PSM_SINGLE_BLOCK.
    pageseg_mode = PSM_SINGLE_BLOCK;
  }
  // The diacritic_blobs holds noise blobs that may be diacritics. They
  // are separated out on areas of the image that seem noisy and short-circuit
  // the layout process, going straight from the initial partition creation
  // right through to after word segmentation, where they are added to the
  // rej_cblobs list of the most appropriate word. From there classification
  // will determine whether they are used.
  BLOBNBOX_LIST diacritic_blobs;
  int auto_page_seg_ret_val = 0;
  TO_BLOCK_LIST to_blocks;
  if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
      PSM_SPARSE(pageseg_mode)) {
    auto_page_seg_ret_val = AutoPageSeg(
        pageseg_mode, blocks, &to_blocks,
        enable_noise_removal ? &diacritic_blobs : NULL, osd_tess, osr);
    if (pageseg_mode == PSM_OSD_ONLY)
      return auto_page_seg_ret_val;
    // To create blobs from the image region bounds uncomment this line:
    //  to_blocks.clear();  // Uncomment to go back to the old mode.
  } else {
    deskew_ = FCOORD(1.0f, 0.0f);
    reskew_ = FCOORD(1.0f, 0.0f);
    if (pageseg_mode == PSM_CIRCLE_WORD) {
      Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
      if (pixcleaned != NULL) {
        pixDestroy(&pix_binary_);
        pix_binary_ = pixcleaned;
      }
    }
  }

  if (auto_page_seg_ret_val < 0) {
    return -1;
  }

  if (blocks->empty()) {
    if (textord_debug_tabfind)
      tprintf("Empty page\n");
    return 0;  // AutoPageSeg found an empty page.
  }
  bool splitting =
      pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
  bool cjk_mode = textord_use_cjk_fp_model;

  textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
                       pix_thresholds_, pix_grey_, splitting || cjk_mode,
                       &diacritic_blobs, blocks, &to_blocks);
  return auto_page_seg_ret_val;
}
Beispiel #26
0
/**
 * Segment the page according to the current value of tessedit_pageseg_mode.
 * pix_binary_ is used as the source image and should not be NULL.
 * On return the blocks list owns all the constructed page layout.
 */
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
                           Tesseract* osd_tess, OSResults* osr) {
  ASSERT_HOST(pix_binary_ != NULL);
  int width = pixGetWidth(pix_binary_);
  int height = pixGetHeight(pix_binary_);
  // Get page segmentation mode.
  PageSegMode pageseg_mode = static_cast<PageSegMode>(
      static_cast<int>(tessedit_pageseg_mode));
  // If a UNLV zone file can be found, use that instead of segmentation.
  if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
      input_file != NULL && input_file->length() > 0) {
    STRING name = *input_file;
    const char* lastdot = strrchr(name.string(), '.');
    if (lastdot != NULL)
      name[lastdot - name.string()] = '\0';
    read_unlv_file(name, width, height, blocks);
  }
  if (blocks->empty()) {
    // No UNLV file present. Work according to the PageSegMode.
    // First make a single block covering the whole image.
    BLOCK_IT block_it(blocks);
    BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
    block->set_right_to_left(right_to_left());
    block_it.add_to_end(block);
  } else {
    // UNLV file present. Use PSM_SINGLE_BLOCK.
    pageseg_mode = PSM_SINGLE_BLOCK;
  }
  int auto_page_seg_ret_val = 0;
  TO_BLOCK_LIST to_blocks;
  if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
      PSM_SPARSE(pageseg_mode)) {
    auto_page_seg_ret_val =
        AutoPageSeg(pageseg_mode, blocks, &to_blocks, osd_tess, osr);
    if (pageseg_mode == PSM_OSD_ONLY)
      return auto_page_seg_ret_val;
    // To create blobs from the image region bounds uncomment this line:
    //  to_blocks.clear();  // Uncomment to go back to the old mode.
  } else {
    deskew_ = FCOORD(1.0f, 0.0f);
    reskew_ = FCOORD(1.0f, 0.0f);
    if (pageseg_mode == PSM_CIRCLE_WORD) {
      Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
      if (pixcleaned != NULL) {
        pixDestroy(&pix_binary_);
        pix_binary_ = pixcleaned;
      }
    }
  }

  if (auto_page_seg_ret_val < 0) {
    return -1;
  }

  if (blocks->empty()) {
    if (textord_debug_tabfind)
      tprintf("Empty page\n");
    return 0;  // AutoPageSeg found an empty page.
  }

  textord_.TextordPage(pageseg_mode, width, height, pix_binary_,
                       blocks, &to_blocks);
  return auto_page_seg_ret_val;
}
Beispiel #27
0
// Segment the page according to the current value of tessedit_pageseg_mode.
// If the pix_binary_ member is not NULL, it is used as the source image,
// and copied to image, otherwise it just uses image as the input.
// On return the blocks list owns all the constructed page layout.
int Tesseract::SegmentPage(const STRING* input_file,
                           IMAGE* image, BLOCK_LIST* blocks) {
  int width = image->get_xsize();
  int height = image->get_ysize();
  int resolution = image->get_res();
#ifdef HAVE_LIBLEPT
  if (pix_binary_ != NULL) {
    width = pixGetWidth(pix_binary_);
    height = pixGetHeight(pix_binary_);
    resolution = pixGetXRes(pix_binary_);
  }
#endif
  // Zero resolution messes up the algorithms, so make sure it is credible.
  if (resolution < kMinCredibleResolution)
    resolution = kDefaultResolution;
  // Get page segmentation mode.
  PageSegMode pageseg_mode = static_cast<PageSegMode>(
      static_cast<int>(tessedit_pageseg_mode));
  // If a UNLV zone file can be found, use that instead of segmentation.
  if (pageseg_mode != tesseract::PSM_AUTO &&
      input_file != NULL && input_file->length() > 0) {
    STRING name = *input_file;
    const char* lastdot = strrchr(name.string(), '.');
    if (lastdot != NULL)
      name[lastdot - name.string()] = '\0';
    read_unlv_file(name, width, height, blocks);
  }
  bool single_column = pageseg_mode > PSM_AUTO;
  if (blocks->empty()) {
    // No UNLV file present. Work according to the PageSegMode.
    // First make a single block covering the whole image.
    BLOCK_IT block_it(blocks);
    BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
    block_it.add_to_end(block);
  } else {
    // UNLV file present. Use PSM_SINGLE_COLUMN.
    pageseg_mode = PSM_SINGLE_COLUMN;
  }

  TO_BLOCK_LIST land_blocks, port_blocks;
  TBOX page_box;
  if (pageseg_mode <= PSM_SINGLE_COLUMN) {
    if (AutoPageSeg(width, height, resolution, single_column,
                    image, blocks, &port_blocks) < 0) {
      return -1;
    }
    // To create blobs from the image region bounds uncomment this line:
    //  port_blocks.clear();  // Uncomment to go back to the old mode.
  } else {
#if HAVE_LIBLEPT
    image->FromPix(pix_binary_);
#endif
    deskew_ = FCOORD(1.0f, 0.0f);
    reskew_ = FCOORD(1.0f, 0.0f);
  }
  if (blocks->empty()) {
    tprintf("Empty page\n");
    return 0;  // AutoPageSeg found an empty page.
  }

  if (port_blocks.empty()) {
    // AutoPageSeg was not used, so we need to find_components first.
    find_components(blocks, &land_blocks, &port_blocks, &page_box);
  } else {
    // AutoPageSeg does not need to find_components as it did that already.
    page_box.set_left(0);
    page_box.set_bottom(0);
    page_box.set_right(width);
    page_box.set_top(height);
    // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
    filter_blobs(page_box.topright(), &port_blocks, true);
  }

  TO_BLOCK_IT to_block_it(&port_blocks);
  ASSERT_HOST(!port_blocks.empty());
  TO_BLOCK* to_block = to_block_it.data();
  if (pageseg_mode <= PSM_SINGLE_BLOCK ||
      to_block->line_size < 2) {
    // For now, AUTO, SINGLE_COLUMN and SINGLE_BLOCK all map to the old
    // textord. The difference is the number of blocks and how the are made.
    textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks,
                 this);
  } else {
    // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
    float gradient = make_single_row(page_box.topright(),
                                     to_block, &port_blocks, this);
    if (pageseg_mode == PSM_SINGLE_LINE) {
      // SINGLE_LINE uses the old word maker on the single line.
      make_words(page_box.topright(), gradient, blocks,
                 &land_blocks, &port_blocks, this);
    } else {
      // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
      // single word, and in SINGLE_CHAR mode, all the outlines
      // go in a single blob.
      make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
                       to_block->get_rows(), to_block->block->row_list());
    }
  }
  return 0;
}