void plot_fp_cells2( //draw words ScrollView *win, //window tro draw in ScrollView::Color colour, //colour of lines TO_ROW *row, //for location FPSEGPT_LIST *seg_list //segments to plot ) { TBOX word_box; //bounding box FPSEGPT_IT seg_it = seg_list; //blobs in row BLOBNBOX_IT blob_it = row->blob_list(); FPSEGPT *segpt; //current point word_box = blob_it.data()->bounding_box(); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();) word_box += box_next(&blob_it); for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { segpt = seg_it.data(); if (segpt->faked) { colour = ScrollView::WHITE; win->Pen(colour); } else { win->Pen(colour); } win->Line(segpt->position(), word_box.bottom(), segpt->position(), word_box.top()); } }
void make_illegal_segment( //find segmentation FPSEGPT_LIST *prev_list, //previous segments TBOX blob_box, //bounding box BLOBNBOX_IT blob_it, //iterator int16_t region_index, //number of segment int16_t pitch, //pitch estimate int16_t pitch_error, //tolerance FPSEGPT_LIST *seg_list //output list ) { int16_t x; //current coord int16_t min_x = 0; //in this region int16_t max_x = 0; int16_t offset; //dist to edge FPSEGPT *segpt; //segment point FPSEGPT *prevpt; //previous point float best_cost; //best path FPSEGPT_IT segpt_it = seg_list;//iterator //previous points FPSEGPT_IT prevpt_it = prev_list; best_cost = FLT_MAX; for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list (); prevpt_it.forward ()) { prevpt = prevpt_it.data (); if (prevpt->cost_function () < best_cost) { //find least best_cost = prevpt->cost_function (); min_x = prevpt->position (); max_x = min_x; //limits on coords } else if (prevpt->cost_function () == best_cost) { max_x = prevpt->position (); } } min_x += pitch - pitch_error; max_x += pitch + pitch_error; for (x = min_x; x <= max_x; x++) { while (x > blob_box.right ()) { blob_box = box_next (&blob_it); } offset = x - blob_box.left (); if (blob_box.right () - x < offset) offset = blob_box.right () - x; segpt = new FPSEGPT (x, FALSE, offset, region_index, pitch, pitch_error, prev_list); if (segpt->previous () != nullptr) { ASSERT_HOST (offset >= 0); fprintf (stderr, "made fake at %d\n", x); //make one up segpt_it.add_after_then_move (segpt); segpt->faked = TRUE; segpt->fake_count++; } else delete segpt; } }
void plot_row_cells( //draw words ScrollView *win, //window tro draw in ScrollView::Color colour, //colour of lines TO_ROW *row, //for location float xshift, //amount of shift ICOORDELT_LIST *cells //cells to draw ) { TBOX word_box; //bounding box ICOORDELT_IT cell_it = cells; //blobs in row BLOBNBOX_IT blob_it = row->blob_list(); ICOORDELT *cell; //current cell word_box = blob_it.data()->bounding_box(); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();) word_box += box_next(&blob_it); win->Pen(colour); for (cell_it.mark_cycle_pt(); !cell_it.cycled_list(); cell_it.forward()) { cell = cell_it.data(); win->Line(cell->x() + xshift, word_box.bottom(), cell->x() + xshift, word_box.top()); } }
void plot_fp_cells( //draw words ScrollView *win, //window tro draw in ScrollView::Color colour, //colour of lines BLOBNBOX_IT *blob_it, //blobs inT16 pitch, //of block inT16 blob_count, //no of real blobs STATS *projection, //vertical inT16 projection_left, //edges //scale factor inT16 projection_right, float projection_scale) { inT16 occupation; //occupied cells TBOX word_box; //bounding box FPSEGPT_LIST seg_list; //list of cuts FPSEGPT_IT seg_it; FPSEGPT *segpt; //current point if (pitsync_linear_version) check_pitch_sync2(blob_it, blob_count, pitch, 2, projection, projection_left, projection_right, projection_scale, occupation, &seg_list, 0, 0); else check_pitch_sync(blob_it, blob_count, pitch, 2, projection, &seg_list); word_box = blob_it->data()->bounding_box(); for (; blob_count > 0; blob_count--) word_box += box_next(blob_it); seg_it.set_to_list(&seg_list); for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { segpt = seg_it.data(); if (segpt->faked) { colour = ScrollView::WHITE; win->Pen(colour); } else { win->Pen(colour); } win->Line(segpt->position(), word_box.bottom(), segpt->position(), word_box.top()); } }
double check_pitch_sync( //find segmentation BLOBNBOX_IT *blob_it, //blobs to do int16_t blob_count, //no of blobs int16_t pitch, //pitch estimate int16_t pitch_error, //tolerance STATS *projection, //vertical FPSEGPT_LIST *seg_list //output list ) { int16_t x; //current coord int16_t min_index; //blob number int16_t max_index; //blob number int16_t left_edge; //of word int16_t right_edge; //of word int16_t right_max; //max allowed x int16_t min_x; //in this region int16_t max_x; int16_t region_index; int16_t best_region_index = 0; //for best result int16_t offset; //dist to legal area int16_t left_best_x; //edge of good region int16_t right_best_x; //right edge TBOX min_box; //bounding box TBOX max_box; //bounding box TBOX next_box; //box of next blob FPSEGPT *segpt; //segment point FPSEGPT_LIST *segpts; //points in a segment double best_cost; //best path double mean_sum; //computes result FPSEGPT *best_end; //end of best path BLOBNBOX_IT min_it; //copy iterator BLOBNBOX_IT max_it; //copy iterator FPSEGPT_IT segpt_it; //iterator //output segments FPSEGPT_IT outseg_it = seg_list; FPSEGPT_LIST_CLIST lattice; //list of lists //region iterator FPSEGPT_LIST_C_IT lattice_it = &lattice; // tprintf("Computing sync on word of %d blobs with pitch %d\n", // blob_count, pitch); // if (blob_count==8 && pitch==27) // projection->print(stdout,TRUE); if (pitch < 3) pitch = 3; //nothing ludicrous if ((pitch - 3) / 2 < pitch_error) pitch_error = (pitch - 3) / 2; min_it = *blob_it; min_box = box_next (&min_it); //get box // if (blob_count==8 && pitch==27) // tprintf("1st box at (%d,%d)->(%d,%d)\n", // min_box.left(),min_box.bottom(), // min_box.right(),min_box.top()); //left of word left_edge = min_box.left () + pitch_error; for (min_index = 1; min_index < blob_count; min_index++) { min_box = box_next (&min_it); // if (blob_count==8 && pitch==27) // tprintf("Box at (%d,%d)->(%d,%d)\n", // min_box.left(),min_box.bottom(), // min_box.right(),min_box.top()); } right_edge = min_box.right (); //end of word max_x = left_edge; //min permissible min_x = max_x - pitch + pitch_error * 2 + 1; right_max = right_edge + pitch - pitch_error - 1; segpts = new FPSEGPT_LIST; //list of points segpt_it.set_to_list (segpts); for (x = min_x; x <= max_x; x++) { segpt = new FPSEGPT (x); //make a new one //put in list segpt_it.add_after_then_move (segpt); } //first segment lattice_it.add_before_then_move (segpts); min_index = 0; region_index = 1; best_cost = FLT_MAX; best_end = nullptr; min_it = *blob_it; min_box = box_next (&min_it); //first box do { left_best_x = -1; right_best_x = -1; segpts = new FPSEGPT_LIST; //list of points segpt_it.set_to_list (segpts); min_x += pitch - pitch_error;//next limits max_x += pitch + pitch_error; while (min_box.right () < min_x && min_index < blob_count) { min_index++; min_box = box_next (&min_it); } max_it = min_it; max_index = min_index; max_box = min_box; next_box = box_next (&max_it); for (x = min_x; x <= max_x && x <= right_max; x++) { while (x < right_edge && max_index < blob_count && x > max_box.right ()) { max_index++; max_box = next_box; next_box = box_next (&max_it); } if (x <= max_box.left () + pitch_error || x >= max_box.right () - pitch_error || x >= right_edge || (max_index < blob_count - 1 && x >= next_box.left ()) || (x - max_box.left () > pitch * pitsync_joined_edge && max_box.right () - x > pitch * pitsync_joined_edge)) { // || projection->local_min(x)) if (x - max_box.left () > 0 && x - max_box.left () <= pitch_error) //dist to real break offset = x - max_box.left (); else if (max_box.right () - x > 0 && max_box.right () - x <= pitch_error && (max_index >= blob_count - 1 || x < next_box.left ())) offset = max_box.right () - x; else offset = 0; // offset=pitsync_offset_freecut_fraction*projection->pile_count(x); segpt = new FPSEGPT (x, FALSE, offset, region_index, pitch, pitch_error, lattice_it.data ()); } else { offset = projection->pile_count (x); segpt = new FPSEGPT (x, TRUE, offset, region_index, pitch, pitch_error, lattice_it.data ()); } if (segpt->previous () != nullptr) { segpt_it.add_after_then_move (segpt); if (x >= right_edge - pitch_error) { segpt->terminal = TRUE;//no more wanted if (segpt->cost_function () < best_cost) { best_cost = segpt->cost_function (); //find least best_end = segpt; best_region_index = region_index; left_best_x = x; right_best_x = x; } else if (segpt->cost_function () == best_cost && right_best_x == x - 1) right_best_x = x; } } else { delete segpt; //no good } } if (segpts->empty ()) { if (best_end != nullptr) break; //already found one make_illegal_segment (lattice_it.data (), min_box, min_it, region_index, pitch, pitch_error, segpts); } else { if (right_best_x > left_best_x + 1) { left_best_x = (left_best_x + right_best_x + 1) / 2; for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list () && segpt_it.data ()->position () != left_best_x; segpt_it.forward ()); if (segpt_it.data ()->position () == left_best_x) //middle of region best_end = segpt_it.data (); } } //new segment lattice_it.add_before_then_move (segpts); region_index++; } while (min_x < right_edge); ASSERT_HOST (best_end != nullptr);//must always find some for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list (); lattice_it.forward ()) { segpts = lattice_it.data (); segpt_it.set_to_list (segpts); // if (blob_count==8 && pitch==27) // { // for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward()) // { // segpt=segpt_it.data(); // tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n", // segpt->position(),segpt,segpt->cost_function(), // segpt->sum(),segpt->squares(),segpt->previous()); // } // tprintf("\n"); // } for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list () && segpt_it.data () != best_end; segpt_it.forward ()); if (segpt_it.data () == best_end) { //save good one segpt = segpt_it.extract (); outseg_it.add_before_then_move (segpt); best_end = segpt->previous (); } } ASSERT_HOST (best_end == nullptr); ASSERT_HOST (!outseg_it.empty ()); outseg_it.move_to_last (); mean_sum = outseg_it.data ()->sum (); mean_sum = mean_sum * mean_sum / best_region_index; if (outseg_it.data ()->squares () - mean_sum < 0) tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", outseg_it.data ()->squares (), outseg_it.data ()->sum (), best_region_index); lattice.deep_clear (); //shift the lot return outseg_it.data ()->squares () - mean_sum; }
GAPMAP::GAPMAP( //Constructor TO_BLOCK *block //block ) { TO_ROW_IT row_it; //row iterator TO_ROW *row; //current row BLOBNBOX_IT blob_it; //iterator TBOX blob_box; TBOX prev_blob_box; inT16 gap_width; inT16 start_of_row; inT16 end_of_row; STATS xht_stats (0, 128); inT16 min_quantum; inT16 max_quantum; inT16 i; row_it.set_to_list (block->get_rows ()); /* Find left and right extremes and bucket size */ map = NULL; min_left = MAX_INT16; max_right = -MAX_INT16; total_rows = 0; any_tabs = FALSE; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); if (!row->blob_list ()->empty ()) { total_rows++; xht_stats.add ((inT16) floor (row->xheight + 0.5), 1); blob_it.set_to_list (row->blob_list ()); start_of_row = blob_it.data ()->bounding_box ().left (); end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); if (min_left > start_of_row) min_left = start_of_row; if (max_right < end_of_row) max_right = end_of_row; } } if ((total_rows < 3) || (min_left >= max_right)) { total_rows = 0; min_left = max_right = 0; return; } bucket_size = (inT16) floor (xht_stats.median () + 0.5) / 2; map_max = (max_right - min_left) / bucket_size; map = (inT16 *) alloc_mem ((map_max + 1) * sizeof (inT16)); for (i = 0; i <= map_max; i++) map[i] = 0; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); if (!row->blob_list ()->empty ()) { blob_it.set_to_list (row->blob_list ()); blob_it.mark_cycle_pt (); blob_box = box_next (&blob_it); prev_blob_box = blob_box; if (gapmap_use_ends) { /* Leading space */ gap_width = blob_box.left () - min_left; if ((gap_width > gapmap_big_gaps * row->xheight) && gap_width > 2) { max_quantum = (blob_box.left () - min_left) / bucket_size; for (i = 0; i <= max_quantum; i++) map[i]++; } } while (!blob_it.cycled_list ()) { blob_box = box_next (&blob_it); gap_width = blob_box.left () - prev_blob_box.right (); if ((gap_width > gapmap_big_gaps * row->xheight) && gap_width > 2) { min_quantum = (prev_blob_box.right () - min_left) / bucket_size; max_quantum = (blob_box.left () - min_left) / bucket_size; for (i = min_quantum; i <= max_quantum; i++) map[i]++; } prev_blob_box = blob_box; } if (gapmap_use_ends) { /* Trailing space */ gap_width = max_right - prev_blob_box.right (); if ((gap_width > gapmap_big_gaps * row->xheight) && gap_width > 2) { min_quantum = (prev_blob_box.right () - min_left) / bucket_size; for (i = min_quantum; i <= map_max; i++) map[i]++; } } } } for (i = 0; i <= map_max; i++) { if (map[i] > total_rows / 2) { if (gapmap_no_isolated_quanta && (((i == 0) && (map[i + 1] <= total_rows / 2)) || ((i == map_max) && (map[i - 1] <= total_rows / 2)) || ((i > 0) && (i < map_max) && (map[i - 1] <= total_rows / 2) && (map[i + 1] <= total_rows / 2)))) { map[i] = 0; //prevent isolated quantum } else any_tabs = TRUE; } } if (gapmap_debug && any_tabs) tprintf ("Table found\n"); }