Пример #1
0
void read_and_textord(                       //read .pb file
                      const char *filename,  //.pb file
                      BLOCK_LIST *blocks) {
  int c;                         //input character
  FILE *infp;                    //input file
  BLOCK *block;                  //current block
  BOX page_box;                  //bounding_box
  BLOCK_IT block_it = blocks;    //iterator
                                 //different orientations
  TO_BLOCK_LIST land_blocks, port_blocks;

  infp = fopen (filename, "r");
  if (infp == NULL)
    CANTOPENFILE.error ("read_and_textord", EXIT, filename);

  while (((c = fgetc (infp)) != EOF) && (ungetc (c, infp) != EOF)) {
                                 //get one
    block = BLOCK::de_serialise (infp);
                                 //add to list
    block_it.add_after_then_move (block);
                                 //find page size
    page_box += block->bounding_box ();
  }
  fclose(infp);

  assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);
  filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);
  filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);
  textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);
}
Пример #2
0
void find_components(
                       BLOCK_LIST *blocks,
                       TO_BLOCK_LIST *land_blocks,
                       TO_BLOCK_LIST *port_blocks,
                       TBOX *page_box) {
  BLOCK *block;                  //current block
  PDBLK_CLIST pd_blocks;         //copy of list
  BLOCK_IT block_it = blocks;    //iterator
  PDBLK_C_IT pd_it = &pd_blocks; //iterator
  IMAGE thresh_image;            //thresholded

  int width = page_image.get_xsize();
  int height = page_image.get_ysize();
  if (width > MAX_INT16 || height > MAX_INT16) {
    tprintf("Input image too large! (%d, %d)\n", width, height);
    return;  // Can't handle it.
  }

  ICOORD page_tr(width, height);
  block_it.set_to_list (blocks);
  if (global_monitor != NULL)
    global_monitor->ocr_alive = TRUE;

  set_global_loc_code(LOC_EDGE_PROG);
  if (!page_image.white_high ())
    invert_image(&page_image);

#ifndef EMBEDDED
  previous_cpu = clock ();
#endif

  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
       block_it.forward()) {
    block = block_it.data();
    if (block->poly_block() == NULL ||
        block->poly_block()->IsText()) {
#ifndef GRAPHICS_DISABLED
      extract_edges(NULL, &page_image, &page_image, page_tr, block);
#else
      extract_edges(&page_image, &page_image, page_tr, block);
#endif
      *page_box += block->bounding_box ();
    }
  }
  if (global_monitor != NULL) {
    global_monitor->ocr_alive = TRUE;
    global_monitor->progress = 10;
  }

  assign_blobs_to_blocks2(blocks, land_blocks, port_blocks);
  if (global_monitor != NULL)
    global_monitor->ocr_alive = TRUE;
  filter_blobs (page_box->topright (), land_blocks, textord_test_landscape);
#ifndef EMBEDDED
  previous_cpu = clock ();
#endif
  filter_blobs (page_box->topright (), port_blocks, !textord_test_landscape);
  if (global_monitor != NULL)
    global_monitor->ocr_alive = TRUE;
}
Пример #3
0
void Textord::find_components(Pix* pix, BLOCK_LIST *blocks,
                              TO_BLOCK_LIST *to_blocks) {
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  if (width > INT16_MAX || height > INT16_MAX) {
    tprintf("Input image too large! (%d, %d)\n", width, height);
    return;  // Can't handle it.
  }

  set_global_loc_code(LOC_EDGE_PROG);

  BLOCK_IT block_it(blocks);    // iterator
  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
       block_it.forward()) {
    BLOCK* block = block_it.data();
    if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) {
      extract_edges(pix, block);
    }
  }

  assign_blobs_to_blocks2(pix, blocks, to_blocks);
  ICOORD page_tr(width, height);
  filter_blobs(page_tr, to_blocks, !textord_test_landscape);
}
Пример #4
0
void edges_and_textord(                       //read .pb file
                       const char *filename,  //.pb file
                       BLOCK_LIST *blocks) {
  BLOCK *block;                  //current block
  char *lastdot;                 //of name
  STRING name = filename;        //truncated name
  ICOORD page_tr;
  BOX page_box;                  //bounding_box
  PDBLK_CLIST pd_blocks;         //copy of list
  BLOCK_IT block_it = blocks;    //iterator
  PDBLK_C_IT pd_it = &pd_blocks; //iterator
                                 //different orientations
  TO_BLOCK_LIST land_blocks, port_blocks;
  IMAGE thresh_image;            //thresholded

  lastdot = strrchr (name.string (), '.');
  if (lastdot != NULL)
    *lastdot = '\0';
  if (page_image.get_bpp () == 0) {
    name += tessedit_image_ext;
    if (page_image.read_header (name.string ()))
      CANTOPENFILE.error ("edges_and_textord", EXIT, name.string ());
    if (page_image.read (0))
      READFAILED.error ("edges_and_textord", EXIT, name.string ());
    name = filename;
    lastdot = strrchr (name.string (), '.');
    if (lastdot != NULL)
      *lastdot = '\0';
  }
  page_tr = ICOORD (page_image.get_xsize (), page_image.get_ysize ());
  read_pd_file (name, page_image.get_xsize (), page_image.get_ysize (),
    blocks);
  block_it.set_to_list (blocks);
  if (global_monitor != NULL)
    global_monitor->ocr_alive = TRUE;

  if (page_image.get_bpp () > 1) {
    set_global_loc_code(LOC_ADAPTIVE);
    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
    block_it.forward ()) {
      block = block_it.data ();
      pd_it.add_after_then_move (block);
    }
    //              adaptive_threshold(&page_image,&pd_blocks,&thresh_image);
    set_global_loc_code(LOC_EDGE_PROG);
#ifndef EMBEDDED
    previous_cpu = clock ();
#endif
    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
    block_it.forward ()) {
      block = block_it.data ();
      if (!polygon_tess_approximation)
        invert_image(&page_image);
#ifndef GRAPHICS_DISABLED
      extract_edges(NO_WINDOW, &page_image, &thresh_image, page_tr, block);
#else
      extract_edges(&page_image, &thresh_image, page_tr, block);
#endif
      page_box += block->bounding_box ();
    }
    page_image = thresh_image;   //everyone else gets it
  }
  else {
    set_global_loc_code(LOC_EDGE_PROG);
    if (!page_image.white_high ())
      invert_image(&page_image);

#ifndef EMBEDDED
    previous_cpu = clock ();
#endif

    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
    block_it.forward ()) {
      block = block_it.data ();
#ifndef GRAPHICS_DISABLED
      extract_edges(NO_WINDOW, &page_image, &page_image, page_tr, block);
#else
      extract_edges(&page_image, &page_image, page_tr, block);
#endif
      page_box += block->bounding_box ();
    }
  }
  if (global_monitor != NULL) {
    global_monitor->ocr_alive = TRUE;
    global_monitor->progress = 10;
  }

  assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);
  if (global_monitor != NULL)
    global_monitor->ocr_alive = TRUE;
  filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);
#ifndef EMBEDDED
  previous_cpu = clock ();
#endif
  filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);
  if (global_monitor != NULL)
    global_monitor->ocr_alive = TRUE;
  textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);
}