コード例 #1
0
ファイル: drawtord.cpp プロジェクト: mehulsbhatt/MyOCRTEST
void plot_fp_cells2(                        //draw words
        ScrollView *win,             //window tro draw in
        ScrollView::Color colour,          //colour of lines
        TO_ROW *row,            //for location
        FPSEGPT_LIST *seg_list  //segments to plot
) {
    TBOX word_box;                  //bounding box
    FPSEGPT_IT seg_it = seg_list;
    //blobs in row
    BLOBNBOX_IT blob_it = row->blob_list();
    FPSEGPT *segpt;                //current point

    word_box = blob_it.data()->bounding_box();
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();)
        word_box += box_next(&blob_it);
    for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
        segpt = seg_it.data();
        if (segpt->faked) {
            colour = ScrollView::WHITE;
            win->Pen(colour);
        }
        else {
            win->Pen(colour);
        }
        win->Line(segpt->position(), word_box.bottom(), segpt->position(), word_box.top());
    }
}
コード例 #2
0
ファイル: pitsync1.cpp プロジェクト: CDOcr/tesseract
void make_illegal_segment(                          //find segmentation
                          FPSEGPT_LIST *prev_list,  //previous segments
                          TBOX blob_box,            //bounding box
                          BLOBNBOX_IT blob_it,      //iterator
                          int16_t region_index,     //number of segment
                          int16_t pitch,            //pitch estimate
                          int16_t pitch_error,      //tolerance
                          FPSEGPT_LIST *seg_list    //output list
                         ) {
  int16_t x;                     //current coord
  int16_t min_x = 0;             //in this region
  int16_t max_x = 0;
  int16_t offset;                //dist to edge
  FPSEGPT *segpt;                //segment point
  FPSEGPT *prevpt;               //previous point
  float best_cost;               //best path
  FPSEGPT_IT segpt_it = seg_list;//iterator
                                 //previous points
  FPSEGPT_IT prevpt_it = prev_list;

  best_cost = FLT_MAX;
  for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list ();
  prevpt_it.forward ()) {
    prevpt = prevpt_it.data ();
    if (prevpt->cost_function () < best_cost) {
                                 //find least
      best_cost = prevpt->cost_function ();
      min_x = prevpt->position ();
      max_x = min_x;             //limits on coords
    }
    else if (prevpt->cost_function () == best_cost) {
      max_x = prevpt->position ();
    }
  }
  min_x += pitch - pitch_error;
  max_x += pitch + pitch_error;
  for (x = min_x; x <= max_x; x++) {
    while (x > blob_box.right ()) {
      blob_box = box_next (&blob_it);
    }
    offset = x - blob_box.left ();
    if (blob_box.right () - x < offset)
      offset = blob_box.right () - x;
    segpt = new FPSEGPT (x, FALSE, offset,
      region_index, pitch, pitch_error, prev_list);
    if (segpt->previous () != nullptr) {
      ASSERT_HOST (offset >= 0);
      fprintf (stderr, "made fake at %d\n", x);
                                 //make one up
      segpt_it.add_after_then_move (segpt);
      segpt->faked = TRUE;
      segpt->fake_count++;
    }
    else
      delete segpt;
  }
}
コード例 #3
0
ファイル: drawtord.cpp プロジェクト: mehulsbhatt/MyOCRTEST
void plot_row_cells(                       //draw words
        ScrollView *win,            //window tro draw in
        ScrollView::Color colour,         //colour of lines
        TO_ROW *row,           //for location
        float xshift,          //amount of shift
        ICOORDELT_LIST *cells  //cells to draw
) {
    TBOX word_box;                  //bounding box
    ICOORDELT_IT cell_it = cells;
    //blobs in row
    BLOBNBOX_IT blob_it = row->blob_list();
    ICOORDELT *cell;               //current cell

    word_box = blob_it.data()->bounding_box();
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();)
        word_box += box_next(&blob_it);
    win->Pen(colour);
    for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) {
        cell = cell_it.data();
        win->Line(cell->x() + xshift, word_box.bottom(), cell->x() + xshift, word_box.top());
    }
}
コード例 #4
0
ファイル: drawtord.cpp プロジェクト: mehulsbhatt/MyOCRTEST
void plot_fp_cells(                        //draw words
        ScrollView *win,             //window tro draw in
        ScrollView::Color colour,          //colour of lines
        BLOBNBOX_IT *blob_it,   //blobs
        inT16 pitch,            //of block
        inT16 blob_count,       //no of real blobs
        STATS *projection,      //vertical
        inT16 projection_left,  //edges //scale factor
        inT16 projection_right,
        float projection_scale) {
    inT16 occupation;              //occupied cells
    TBOX word_box;                  //bounding box
    FPSEGPT_LIST seg_list;         //list of cuts
    FPSEGPT_IT seg_it;
    FPSEGPT *segpt;                //current point

    if (pitsync_linear_version)
        check_pitch_sync2(blob_it, blob_count, pitch, 2, projection,
                          projection_left, projection_right,
                          projection_scale, occupation, &seg_list, 0, 0);
    else
        check_pitch_sync(blob_it, blob_count, pitch, 2, projection, &seg_list);
    word_box = blob_it->data()->bounding_box();
    for (; blob_count > 0; blob_count--)
        word_box += box_next(blob_it);
    seg_it.set_to_list(&seg_list);
    for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
        segpt = seg_it.data();
        if (segpt->faked) {
            colour = ScrollView::WHITE;
            win->Pen(colour);
        }
        else {
            win->Pen(colour);
        }
        win->Line(segpt->position(), word_box.bottom(), segpt->position(), word_box.top());
    }
}
コード例 #5
0
ファイル: pitsync1.cpp プロジェクト: CDOcr/tesseract
double check_pitch_sync(                        //find segmentation
                        BLOBNBOX_IT *blob_it,   //blobs to do
                        int16_t blob_count,     //no of blobs
                        int16_t pitch,          //pitch estimate
                        int16_t pitch_error,    //tolerance
                        STATS *projection,      //vertical
                        FPSEGPT_LIST *seg_list  //output list
                       ) {
  int16_t x;                     //current coord
  int16_t min_index;             //blob number
  int16_t max_index;             //blob number
  int16_t left_edge;             //of word
  int16_t right_edge;            //of word
  int16_t right_max;             //max allowed x
  int16_t min_x;                 //in this region
  int16_t max_x;
  int16_t region_index;
  int16_t best_region_index = 0; //for best result
  int16_t offset;                //dist to legal area
  int16_t left_best_x;           //edge of good region
  int16_t right_best_x;          //right edge
  TBOX min_box;                  //bounding box
  TBOX max_box;                  //bounding box
  TBOX next_box;                 //box of next blob
  FPSEGPT *segpt;                //segment point
  FPSEGPT_LIST *segpts;          //points in a segment
  double best_cost;              //best path
  double mean_sum;               //computes result
  FPSEGPT *best_end;             //end of best path
  BLOBNBOX_IT min_it;            //copy iterator
  BLOBNBOX_IT max_it;            //copy iterator
  FPSEGPT_IT segpt_it;           //iterator
                                 //output segments
  FPSEGPT_IT outseg_it = seg_list;
  FPSEGPT_LIST_CLIST lattice;    //list of lists
                                 //region iterator
  FPSEGPT_LIST_C_IT lattice_it = &lattice;

  //      tprintf("Computing sync on word of %d blobs with pitch %d\n",
  //              blob_count, pitch);
  //      if (blob_count==8 && pitch==27)
  //              projection->print(stdout,TRUE);
  if (pitch < 3)
    pitch = 3;                   //nothing ludicrous
  if ((pitch - 3) / 2 < pitch_error)
    pitch_error = (pitch - 3) / 2;
  min_it = *blob_it;
  min_box = box_next (&min_it);  //get box
  //      if (blob_count==8 && pitch==27)
  //              tprintf("1st box at (%d,%d)->(%d,%d)\n",
  //                      min_box.left(),min_box.bottom(),
  //                      min_box.right(),min_box.top());
                                 //left of word
  left_edge = min_box.left () + pitch_error;
  for (min_index = 1; min_index < blob_count; min_index++) {
    min_box = box_next (&min_it);
    //              if (blob_count==8 && pitch==27)
    //                      tprintf("Box at (%d,%d)->(%d,%d)\n",
    //                              min_box.left(),min_box.bottom(),
    //                              min_box.right(),min_box.top());
  }
  right_edge = min_box.right (); //end of word
  max_x = left_edge;
                                 //min permissible
  min_x = max_x - pitch + pitch_error * 2 + 1;
  right_max = right_edge + pitch - pitch_error - 1;
  segpts = new FPSEGPT_LIST;     //list of points
  segpt_it.set_to_list (segpts);
  for (x = min_x; x <= max_x; x++) {
    segpt = new FPSEGPT (x);     //make a new one
                                 //put in list
    segpt_it.add_after_then_move (segpt);
  }
                                 //first segment
  lattice_it.add_before_then_move (segpts);
  min_index = 0;
  region_index = 1;
  best_cost = FLT_MAX;
  best_end = nullptr;
  min_it = *blob_it;
  min_box = box_next (&min_it);  //first box
  do {
    left_best_x = -1;
    right_best_x = -1;
    segpts = new FPSEGPT_LIST;   //list of points
    segpt_it.set_to_list (segpts);
    min_x += pitch - pitch_error;//next limits
    max_x += pitch + pitch_error;
    while (min_box.right () < min_x && min_index < blob_count) {
      min_index++;
      min_box = box_next (&min_it);
    }
    max_it = min_it;
    max_index = min_index;
    max_box = min_box;
    next_box = box_next (&max_it);
    for (x = min_x; x <= max_x && x <= right_max; x++) {
      while (x < right_edge && max_index < blob_count
      && x > max_box.right ()) {
        max_index++;
        max_box = next_box;
        next_box = box_next (&max_it);
      }
      if (x <= max_box.left () + pitch_error
        || x >= max_box.right () - pitch_error || x >= right_edge
        || (max_index < blob_count - 1 && x >= next_box.left ())
        || (x - max_box.left () > pitch * pitsync_joined_edge
      && max_box.right () - x > pitch * pitsync_joined_edge)) {
      //                      || projection->local_min(x))
        if (x - max_box.left () > 0
          && x - max_box.left () <= pitch_error)
                                 //dist to real break
          offset = x - max_box.left ();
        else if (max_box.right () - x > 0
          && max_box.right () - x <= pitch_error
          && (max_index >= blob_count - 1
          || x < next_box.left ()))
          offset = max_box.right () - x;
        else
          offset = 0;
        //                              offset=pitsync_offset_freecut_fraction*projection->pile_count(x);
        segpt = new FPSEGPT (x, FALSE, offset, region_index,
          pitch, pitch_error, lattice_it.data ());
      }
      else {
        offset = projection->pile_count (x);
        segpt = new FPSEGPT (x, TRUE, offset, region_index,
          pitch, pitch_error, lattice_it.data ());
      }
      if (segpt->previous () != nullptr) {
        segpt_it.add_after_then_move (segpt);
        if (x >= right_edge - pitch_error) {
          segpt->terminal = TRUE;//no more wanted
          if (segpt->cost_function () < best_cost) {
            best_cost = segpt->cost_function ();
            //find least
            best_end = segpt;
            best_region_index = region_index;
            left_best_x = x;
            right_best_x = x;
          }
          else if (segpt->cost_function () == best_cost
            && right_best_x == x - 1)
            right_best_x = x;
        }
      }
      else {
        delete segpt;            //no good
      }
    }
    if (segpts->empty ()) {
      if (best_end != nullptr)
        break;                   //already found one
      make_illegal_segment (lattice_it.data (), min_box, min_it,
        region_index, pitch, pitch_error, segpts);
    }
    else {
      if (right_best_x > left_best_x + 1) {
        left_best_x = (left_best_x + right_best_x + 1) / 2;
        for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
          && segpt_it.data ()->position () != left_best_x;
          segpt_it.forward ());
        if (segpt_it.data ()->position () == left_best_x)
                                 //middle of region
          best_end = segpt_it.data ();
      }
    }
                                 //new segment
    lattice_it.add_before_then_move (segpts);
    region_index++;
  }
  while (min_x < right_edge);
  ASSERT_HOST (best_end != nullptr);//must always find some

  for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list ();
  lattice_it.forward ()) {
    segpts = lattice_it.data ();
    segpt_it.set_to_list (segpts);
    //              if (blob_count==8 && pitch==27)
    //              {
    //                      for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward())
    //                      {
    //                              segpt=segpt_it.data();
    //                              tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n",
    //                                      segpt->position(),segpt,segpt->cost_function(),
    //                                      segpt->sum(),segpt->squares(),segpt->previous());
    //                      }
    //                      tprintf("\n");
    //              }
    for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
      && segpt_it.data () != best_end; segpt_it.forward ());
    if (segpt_it.data () == best_end) {
                                 //save good one
      segpt = segpt_it.extract ();
      outseg_it.add_before_then_move (segpt);
      best_end = segpt->previous ();
    }
  }
  ASSERT_HOST (best_end == nullptr);
  ASSERT_HOST (!outseg_it.empty ());
  outseg_it.move_to_last ();
  mean_sum = outseg_it.data ()->sum ();
  mean_sum = mean_sum * mean_sum / best_region_index;
  if (outseg_it.data ()->squares () - mean_sum < 0)
    tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
      outseg_it.data ()->squares (), outseg_it.data ()->sum (),
      best_region_index);
  lattice.deep_clear ();         //shift the lot
  return outseg_it.data ()->squares () - mean_sum;
}
コード例 #6
0
ファイル: gap_map.cpp プロジェクト: chrox/node-dv
GAPMAP::GAPMAP(                 //Constructor
               TO_BLOCK *block  //block
              ) {
  TO_ROW_IT row_it;              //row iterator
  TO_ROW *row;                   //current row
  BLOBNBOX_IT blob_it;           //iterator
  TBOX blob_box;
  TBOX prev_blob_box;
  inT16 gap_width;
  inT16 start_of_row;
  inT16 end_of_row;
  STATS xht_stats (0, 128);
  inT16 min_quantum;
  inT16 max_quantum;
  inT16 i;

  row_it.set_to_list (block->get_rows ());
  /*
    Find left and right extremes and bucket size
  */
  map = NULL;
  min_left = MAX_INT16;
  max_right = -MAX_INT16;
  total_rows = 0;
  any_tabs = FALSE;
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    if (!row->blob_list ()->empty ()) {
      total_rows++;
      xht_stats.add ((inT16) floor (row->xheight + 0.5), 1);
      blob_it.set_to_list (row->blob_list ());
      start_of_row = blob_it.data ()->bounding_box ().left ();
      end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
      if (min_left > start_of_row)
        min_left = start_of_row;
      if (max_right < end_of_row)
        max_right = end_of_row;
    }
  }
  if ((total_rows < 3) || (min_left >= max_right)) {
    total_rows = 0;
    min_left = max_right = 0;
    return;
  }
  bucket_size = (inT16) floor (xht_stats.median () + 0.5) / 2;
  map_max = (max_right - min_left) / bucket_size;
  map = (inT16 *) alloc_mem ((map_max + 1) * sizeof (inT16));
  for (i = 0; i <= map_max; i++)
    map[i] = 0;

  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    if (!row->blob_list ()->empty ()) {
      blob_it.set_to_list (row->blob_list ());
      blob_it.mark_cycle_pt ();
      blob_box = box_next (&blob_it);
      prev_blob_box = blob_box;
      if (gapmap_use_ends) {
        /* Leading space */
        gap_width = blob_box.left () - min_left;
        if ((gap_width > gapmap_big_gaps * row->xheight)
        && gap_width > 2) {
          max_quantum = (blob_box.left () - min_left) / bucket_size;
          for (i = 0; i <= max_quantum; i++)
            map[i]++;
        }
      }
      while (!blob_it.cycled_list ()) {
        blob_box = box_next (&blob_it);
        gap_width = blob_box.left () - prev_blob_box.right ();
        if ((gap_width > gapmap_big_gaps * row->xheight)
        && gap_width > 2) {
          min_quantum =
            (prev_blob_box.right () - min_left) / bucket_size;
          max_quantum = (blob_box.left () - min_left) / bucket_size;
          for (i = min_quantum; i <= max_quantum; i++)
            map[i]++;
        }
        prev_blob_box = blob_box;
      }
      if (gapmap_use_ends) {
        /* Trailing space */
        gap_width = max_right - prev_blob_box.right ();
        if ((gap_width > gapmap_big_gaps * row->xheight)
        && gap_width > 2) {
          min_quantum =
            (prev_blob_box.right () - min_left) / bucket_size;
          for (i = min_quantum; i <= map_max; i++)
            map[i]++;
        }
      }
    }
  }
  for (i = 0; i <= map_max; i++) {
    if (map[i] > total_rows / 2) {
      if (gapmap_no_isolated_quanta &&
        (((i == 0) &&
        (map[i + 1] <= total_rows / 2)) ||
        ((i == map_max) &&
        (map[i - 1] <= total_rows / 2)) ||
        ((i > 0) &&
        (i < map_max) &&
        (map[i - 1] <= total_rows / 2) &&
      (map[i + 1] <= total_rows / 2)))) {
        map[i] = 0;              //prevent isolated quantum
      }
      else
        any_tabs = TRUE;
    }
  }
  if (gapmap_debug && any_tabs)
    tprintf ("Table found\n");
}