void textord_page( //make rows & words ICOORD page_tr, //top right BLOCK_LIST *blocks, //block list TO_BLOCK_LIST *land_blocks, //rotated for landscape TO_BLOCK_LIST *port_blocks //output list ) { float gradient; //global skew set_global_loc_code(LOC_TEXT_ORD_ROWS); gradient = make_rows (page_tr, blocks, land_blocks, port_blocks); if (global_monitor != NULL) { global_monitor->ocr_alive = TRUE; global_monitor->progress = 20; } set_global_loc_code(LOC_TEXT_ORD_WORDS); make_words(page_tr, gradient, blocks, land_blocks, port_blocks); if (global_monitor != NULL) { global_monitor->ocr_alive = TRUE; global_monitor->progress = 30; } cleanup_blocks(blocks); //remove empties #ifndef GRAPHICS_DISABLED close_to_win(); #endif if (textord_exit_after && !interactive_mode) exit (0); }
void find_components( BLOCK_LIST *blocks, TO_BLOCK_LIST *land_blocks, TO_BLOCK_LIST *port_blocks, TBOX *page_box) { BLOCK *block; //current block PDBLK_CLIST pd_blocks; //copy of list BLOCK_IT block_it = blocks; //iterator PDBLK_C_IT pd_it = &pd_blocks; //iterator IMAGE thresh_image; //thresholded int width = page_image.get_xsize(); int height = page_image.get_ysize(); if (width > MAX_INT16 || height > MAX_INT16) { tprintf("Input image too large! (%d, %d)\n", width, height); return; // Can't handle it. } ICOORD page_tr(width, height); block_it.set_to_list (blocks); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; set_global_loc_code(LOC_EDGE_PROG); if (!page_image.white_high ()) invert_image(&page_image); #ifndef EMBEDDED previous_cpu = clock (); #endif for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { block = block_it.data(); if (block->poly_block() == NULL || block->poly_block()->IsText()) { #ifndef GRAPHICS_DISABLED extract_edges(NULL, &page_image, &page_image, page_tr, block); #else extract_edges(&page_image, &page_image, page_tr, block); #endif *page_box += block->bounding_box (); } } if (global_monitor != NULL) { global_monitor->ocr_alive = TRUE; global_monitor->progress = 10; } assign_blobs_to_blocks2(blocks, land_blocks, port_blocks); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; filter_blobs (page_box->topright (), land_blocks, textord_test_landscape); #ifndef EMBEDDED previous_cpu = clock (); #endif filter_blobs (page_box->topright (), port_blocks, !textord_test_landscape); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; }
void Textord::find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); if (width > INT16_MAX || height > INT16_MAX) { tprintf("Input image too large! (%d, %d)\n", width, height); return; // Can't handle it. } set_global_loc_code(LOC_EDGE_PROG); BLOCK_IT block_it(blocks); // iterator for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { BLOCK* block = block_it.data(); if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) { extract_edges(pix, block); } } assign_blobs_to_blocks2(pix, blocks, to_blocks); ICOORD page_tr(width, height); filter_blobs(page_tr, to_blocks, !textord_test_landscape); }
void edges_and_textord( //read .pb file const char *filename, //.pb file BLOCK_LIST *blocks) { BLOCK *block; //current block char *lastdot; //of name STRING name = filename; //truncated name ICOORD page_tr; BOX page_box; //bounding_box PDBLK_CLIST pd_blocks; //copy of list BLOCK_IT block_it = blocks; //iterator PDBLK_C_IT pd_it = &pd_blocks; //iterator //different orientations TO_BLOCK_LIST land_blocks, port_blocks; IMAGE thresh_image; //thresholded lastdot = strrchr (name.string (), '.'); if (lastdot != NULL) *lastdot = '\0'; if (page_image.get_bpp () == 0) { name += tessedit_image_ext; if (page_image.read_header (name.string ())) CANTOPENFILE.error ("edges_and_textord", EXIT, name.string ()); if (page_image.read (0)) READFAILED.error ("edges_and_textord", EXIT, name.string ()); name = filename; lastdot = strrchr (name.string (), '.'); if (lastdot != NULL) *lastdot = '\0'; } page_tr = ICOORD (page_image.get_xsize (), page_image.get_ysize ()); read_pd_file (name, page_image.get_xsize (), page_image.get_ysize (), blocks); block_it.set_to_list (blocks); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; if (page_image.get_bpp () > 1) { set_global_loc_code(LOC_ADAPTIVE); for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); pd_it.add_after_then_move (block); } // adaptive_threshold(&page_image,&pd_blocks,&thresh_image); set_global_loc_code(LOC_EDGE_PROG); #ifndef EMBEDDED previous_cpu = clock (); #endif for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); if (!polygon_tess_approximation) invert_image(&page_image); #ifndef GRAPHICS_DISABLED extract_edges(NO_WINDOW, &page_image, &thresh_image, page_tr, block); #else extract_edges(&page_image, &thresh_image, page_tr, block); #endif page_box += block->bounding_box (); } page_image = thresh_image; //everyone else gets it } else { set_global_loc_code(LOC_EDGE_PROG); if (!page_image.white_high ()) invert_image(&page_image); #ifndef EMBEDDED previous_cpu = clock (); #endif for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); #ifndef GRAPHICS_DISABLED extract_edges(NO_WINDOW, &page_image, &page_image, page_tr, block); #else extract_edges(&page_image, &page_image, page_tr, block); #endif page_box += block->bounding_box (); } } if (global_monitor != NULL) { global_monitor->ocr_alive = TRUE; global_monitor->progress = 10; } assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape); #ifndef EMBEDDED previous_cpu = clock (); #endif filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks); }