/** * word_display() Word Processor * * Display a word according to its display modes */ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) { WERD* word = word_res->word; TBOX word_bb; // word bounding box int word_height; // ht of word BB BOOL8 displayed_something = FALSE; float shift; // from bot left C_BLOB_IT c_it; // cblob iterator if (color_mode != CM_RAINBOW && word_res->box_word != NULL) { BoxWord* box_word = word_res->box_word; WERD_CHOICE* best_choice = word_res->best_choice; int length = box_word->length(); if (word_res->fontinfo == NULL) return false; const FontInfo& font_info = *word_res->fontinfo; for (int i = 0; i < length; ++i) { ScrollView::Color color = ScrollView::GREEN; switch (color_mode) { case CM_SUBSCRIPT: if (best_choice->BlobPosition(i) == SP_SUBSCRIPT) color = ScrollView::RED; break; case CM_SUPERSCRIPT: if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT) color = ScrollView::RED; break; case CM_ITALIC: if (font_info.is_italic()) color = ScrollView::RED; break; case CM_BOLD: if (font_info.is_bold()) color = ScrollView::RED; break; case CM_FIXEDPITCH: if (font_info.is_fixed_pitch()) color = ScrollView::RED; break; case CM_SERIF: if (font_info.is_serif()) color = ScrollView::RED; break; case CM_SMALLCAPS: if (word_res->small_caps) color = ScrollView::RED; break; case CM_DROPCAPS: if (best_choice->BlobPosition(i) == SP_DROPCAP) color = ScrollView::RED; break; // TODO(rays) underline is currently completely unsupported. case CM_UNDERLINE: default: break; } image_win->Pen(color); TBOX box = box_word->BlobBox(i); image_win->Rectangle(box.left(), box.bottom(), box.right(), box.top()); } return true; } /* Note the double coercions of(COLOUR)((inT32)editor_image_word_bb_color) etc. are to keep the compiler happy. */ // display bounding box if (word->display_flag(DF_BOX)) { word->bounding_box().plot(image_win, (ScrollView::Color)((inT32) editor_image_word_bb_color), (ScrollView::Color)((inT32) editor_image_word_bb_color)); ScrollView::Color c = (ScrollView::Color) ((inT32) editor_image_blob_bb_color); image_win->Pen(c); c_it.set_to_list(word->cblob_list()); for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) c_it.data()->bounding_box().plot(image_win); displayed_something = TRUE; } // display edge steps if (word->display_flag(DF_EDGE_STEP)) { // edgesteps available word->plot(image_win); // rainbow colors displayed_something = TRUE; } // display poly approx if (word->display_flag(DF_POLYGONAL)) { // need to convert TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word); tword->plot(image_win); delete tword; displayed_something = TRUE; } // Display correct text and blamer information. STRING text; STRING blame; if (word->display_flag(DF_TEXT) && word->text() != NULL) { text = word->text(); } if (word->display_flag(DF_BLAMER) && !(word_res->blamer_bundle != NULL && word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) { text = ""; const BlamerBundle *blamer_bundle = word_res->blamer_bundle; if (blamer_bundle == NULL) { text += "NULL"; } else { text = blamer_bundle->TruthString(); } text += " -> "; STRING best_choice_str; if (word_res->best_choice == NULL) { best_choice_str = "NULL"; } else { word_res->best_choice->string_and_lengths(&best_choice_str, NULL); } text += best_choice_str; IncorrectResultReason reason = (blamer_bundle == NULL) ? IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason(); ASSERT_HOST(reason < IRR_NUM_REASONS) blame += " ["; blame += BlamerBundle::IncorrectReasonName(reason); blame += "]"; } if (text.length() > 0) { word_bb = word->bounding_box(); image_win->Pen(ScrollView::RED); word_height = word_bb.height(); int text_height = 0.50 * word_height; if (text_height > 20) text_height = 20; image_win->TextAttributes("Arial", text_height, false, false, false); shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f; image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.string()); if (blame.length() > 0) { image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height - text_height, blame.string()); } displayed_something = TRUE; } if (!displayed_something) // display BBox anyway word->bounding_box().plot(image_win, (ScrollView::Color)((inT32) editor_image_word_bb_color), (ScrollView::Color)((inT32) editor_image_word_bb_color)); return TRUE; }
/** * Segment the page according to the current value of tessedit_pageseg_mode. * pix_binary_ is used as the source image and should not be NULL. * On return the blocks list owns all the constructed page layout. */ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr) { ASSERT_HOST(pix_binary_ != NULL); int width = pixGetWidth(pix_binary_); int height = pixGetHeight(pix_binary_); // Get page segmentation mode. PageSegMode pageseg_mode = static_cast<PageSegMode>( static_cast<int>(tessedit_pageseg_mode)); // If a UNLV zone file can be found, use that instead of segmentation. if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != NULL && input_file->length() > 0) { STRING name = *input_file; const char* lastdot = strrchr(name.string(), '.'); if (lastdot != NULL) name[lastdot - name.string()] = '\0'; read_unlv_file(name, width, height, blocks); } if (blocks->empty()) { // No UNLV file present. Work according to the PageSegMode. // First make a single block covering the whole image. BLOCK_IT block_it(blocks); BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); block->set_right_to_left(right_to_left()); block_it.add_to_end(block); } else { // UNLV file present. Use PSM_SINGLE_BLOCK. pageseg_mode = PSM_SINGLE_BLOCK; } int auto_page_seg_ret_val = 0; TO_BLOCK_LIST to_blocks; if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) { auto_page_seg_ret_val = AutoPageSeg(pageseg_mode, blocks, &to_blocks, osd_tess, osr); if (pageseg_mode == PSM_OSD_ONLY) return auto_page_seg_ret_val; // To create blobs from the image region bounds uncomment this line: // to_blocks.clear(); // Uncomment to go back to the old mode. } else { deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); if (pageseg_mode == PSM_CIRCLE_WORD) { Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_); if (pixcleaned != NULL) { pixDestroy(&pix_binary_); pix_binary_ = pixcleaned; } } } if (auto_page_seg_ret_val < 0) { return -1; } if (blocks->empty()) { if (textord_debug_tabfind) tprintf("Empty page\n"); return 0; // AutoPageSeg found an empty page. } bool splitting = pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT; bool cjk_mode = textord_use_cjk_fp_model; textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, pix_thresholds_, pix_grey_, splitting || cjk_mode, blocks, &to_blocks); return auto_page_seg_ret_val; }
static void CallWithUTF8(TessCallback1<const char *> *cb, const WERD_CHOICE *wc) { STRING s; wc->string_and_lengths(&s, nullptr); cb->Run(s.string()); }
int main(int argc, char **argv) { #ifdef USING_GETTEXT setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); #endif if ((argc == 2 && strcmp(argv[1], "-v") == 0) || (argc == 2 && strcmp(argv[1], "--version") == 0)) { char *versionStrP; fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); versionStrP = getLeptonicaVersion(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); versionStrP = getImagelibVersions(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); exit(0); } // Make the order of args a bit more forgiving than it used to be. const char* lang = "eng"; const char* image = NULL; const char* output = NULL; bool noocr = false; bool list_langs = false; bool print_parameters = false; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; int arg = 1; while (arg < argc && (output == NULL || argv[arg][0] == '-')) { if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) { lang = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) { pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1])); ++arg; } else if (strcmp(argv[arg], "--print-parameters") == 0) { noocr = true; print_parameters = true; } else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { // handled properly after api init ++arg; } else if (image == NULL) { image = argv[arg]; } else if (output == NULL) { output = argv[arg]; } ++arg; } if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) { list_langs = true; noocr = true; } if (output == NULL && noocr == false) { fprintf(stderr, _("Usage:%s imagename outputbase|stdout [-l lang] " "[-psm pagesegmode] [-c configvar=value] " "[configfile...]\n\n"), argv[0]); fprintf(stderr, _("pagesegmode values are:\n" "0 = Orientation and script detection (OSD) only.\n" "1 = Automatic page segmentation with OSD.\n" "2 = Automatic page segmentation, but no OSD, or OCR\n" "3 = Fully automatic page segmentation, but no OSD. (Default)\n" "4 = Assume a single column of text of variable sizes.\n" "5 = Assume a single uniform block of vertically aligned text.\n" "6 = Assume a single uniform block of text.\n" "7 = Treat the image as a single text line.\n" "8 = Treat the image as a single word.\n" "9 = Treat the image as a single word in a circle.\n" "10 = Treat the image as a single character.\n")); fprintf(stderr, _("multiple -c arguments are allowed.\n")); fprintf(stderr, _("-l lang, -psm pagesegmode and any -c options must occur" "before any configfile.\n\n")); fprintf(stderr, _("Single options:\n")); fprintf(stderr, _(" -v --version: version info\n")); fprintf(stderr, _(" --list-langs: list available languages for tesseract " "engine\n")); fprintf(stderr, _(" --print-parameters: print tesseract parameters to the " "stdout\n")); exit(1); } tesseract::TessBaseAPI api; STRING tessdata_dir; truncate_path(argv[0], &tessdata_dir); api.SetOutputName(output); int rc = api.Init(tessdata_dir.string(), lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc - arg, NULL, NULL, false); if (rc) { fprintf(stderr, _("Could not initialize tesseract.\n")); exit(1); } char opt1[255], opt2[255]; for (arg = 0; arg < argc; arg++) { if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { strncpy(opt1, argv[arg + 1], 255); *(strchr(opt1, '=')) = 0; strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255); opt2[254] = 0; ++arg; if(!api.SetVariable(opt1, opt2)) { fprintf(stderr, _("Could not set option: %s=%s\n"), opt1, opt2); } } } if (list_langs) { GenericVector<STRING> languages; api.GetAvailableLanguagesAsVector(&languages); fprintf(stderr, _("List of available languages (%d):\n"), languages.size()); for (int index = 0; index < languages.size(); ++index) { STRING& string = languages[index]; fprintf(stderr, "%s\n", string.string()); } api.End(); exit(0); } if (print_parameters) { FILE* fout = stdout; fprintf(stdout, _("Tesseract parameters:\n")); api.PrintVariables(fout); api.End(); exit(0); } // We have 2 possible sources of pagesegmode: a config file and // the command line. For backwards compatability reasons, the // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the // default for this program is tesseract::PSM_AUTO. We will let // the config file take priority, so the command-line default // can take priority over the tesseract default, so we use the // value from the command line only if the retrieved mode // is still tesseract::PSM_SINGLE_BLOCK, indicating no change // in any config file. Therefore the only way to force // tesseract::PSM_SINGLE_BLOCK is from the command line. // It would be simpler if we could set the value before Init, // but that doesn't work. if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) api.SetPageSegMode(pagesegmode); tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", tesseract::TessBaseAPI::Version()); FILE* fin = fopen(image, "rb"); if (fin == NULL) { fprintf(stderr, _("Cannot open input file: %s\n"), image); exit(2); } fclose(fin); PIX *pixs; if ((pixs = pixRead(image)) == NULL) { fprintf(stderr, _("Unsupported image type.\n")); exit(3); } pixDestroy(&pixs); bool output_hocr = false; api.GetBoolVariable("tessedit_create_hocr", &output_hocr); bool output_box = false; api.GetBoolVariable("tessedit_create_boxfile", &output_box); FILE* fout = stdout; if (strcmp(output, "-") && strcmp(output, "stdout")) { STRING outfile = output; outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt"; fout = fopen(outfile.string(), "wb"); if (fout == NULL) { fprintf(stderr, _("Cannot create output file %s\n"), outfile.string()); exit(1); } } STRING text_out; if (!api.ProcessPages(image, NULL, 0, &text_out)) { fprintf(stderr, _("Error during processing.\n")); if (fout != stdout) fclose(fout); exit(1); } fwrite(text_out.string(), 1, text_out.length(), fout); if (fout != stdout) fclose(fout); else clearerr(fout); return 0; // Normal exit }
// Segment the page according to the current value of tessedit_pageseg_mode. // If the pix_binary_ member is not NULL, it is used as the source image, // and copied to image, otherwise it just uses image as the input. // On return the blocks list owns all the constructed page layout. int Tesseract::SegmentPage(const STRING* input_file, IMAGE* image, BLOCK_LIST* blocks) { int width = image->get_xsize(); int height = image->get_ysize(); int resolution = image->get_res(); #ifdef HAVE_LIBLEPT if (pix_binary_ != NULL) { width = pixGetWidth(pix_binary_); height = pixGetHeight(pix_binary_); resolution = pixGetXRes(pix_binary_); } #endif // Zero resolution messes up the algorithms, so make sure it is credible. if (resolution < kMinCredibleResolution) resolution = kDefaultResolution; // Get page segmentation mode. PageSegMode pageseg_mode = static_cast<PageSegMode>( static_cast<int>(tessedit_pageseg_mode)); // If a UNLV zone file can be found, use that instead of segmentation. if (pageseg_mode != tesseract::PSM_AUTO && input_file != NULL && input_file->length() > 0) { STRING name = *input_file; const char* lastdot = strrchr(name.string(), '.'); if (lastdot != NULL) name[lastdot - name.string()] = '\0'; read_unlv_file(name, width, height, blocks); } bool single_column = pageseg_mode > PSM_AUTO; if (blocks->empty()) { // No UNLV file present. Work according to the PageSegMode. // First make a single block covering the whole image. BLOCK_IT block_it(blocks); BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); block_it.add_to_end(block); } else { // UNLV file present. Use PSM_SINGLE_COLUMN. pageseg_mode = PSM_SINGLE_COLUMN; } TO_BLOCK_LIST land_blocks, port_blocks; TBOX page_box; if (pageseg_mode <= PSM_SINGLE_COLUMN) { if (AutoPageSeg(width, height, resolution, single_column, image, blocks, &port_blocks) < 0) { return -1; } // To create blobs from the image region bounds uncomment this line: // port_blocks.clear(); // Uncomment to go back to the old mode. } else { #if HAVE_LIBLEPT image->FromPix(pix_binary_); #endif deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); } if (blocks->empty()) { tprintf("Empty page\n"); return 0; // AutoPageSeg found an empty page. } if (port_blocks.empty()) { // AutoPageSeg was not used, so we need to find_components first. find_components(blocks, &land_blocks, &port_blocks, &page_box); } else { // AutoPageSeg does not need to find_components as it did that already. page_box.set_left(0); page_box.set_bottom(0); page_box.set_right(width); page_box.set_top(height); // Filter_blobs sets up the TO_BLOCKs the same as find_components does. filter_blobs(page_box.topright(), &port_blocks, true); } TO_BLOCK_IT to_block_it(&port_blocks); ASSERT_HOST(!port_blocks.empty()); TO_BLOCK* to_block = to_block_it.data(); if (pageseg_mode <= PSM_SINGLE_BLOCK || to_block->line_size < 2) { // For now, AUTO, SINGLE_COLUMN and SINGLE_BLOCK all map to the old // textord. The difference is the number of blocks and how the are made. textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks, this); } else { // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. float gradient = make_single_row(page_box.topright(), to_block, &port_blocks, this); if (pageseg_mode == PSM_SINGLE_LINE) { // SINGLE_LINE uses the old word maker on the single line. make_words(page_box.topright(), gradient, blocks, &land_blocks, &port_blocks, this); } else { // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a // single word, and in SINGLE_CHAR mode, all the outlines // go in a single blob. make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(), to_block->block->row_list()); } } return 0; }
// Main program to combine/extract/overwrite tessdata components // in [lang].traineddata files. // // To combine all the individual tessdata components (unicharset, DAWGs, // classifier templates, ambiguities, language configs) located at, say, // /home/$USER/temp/eng.* run: // // combine_tessdata /home/$USER/temp/eng. // // The result will be a combined tessdata file /home/$USER/temp/eng.traineddata // // Specify option -e if you would like to extract individual components // from a combined traineddata file. For DC, to extract language config // file and the unicharset from tessdata/eng.traineddata run: // // combine_tessdata -e tessdata/eng.traineddata // /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset // // The desired config file and unicharset will be written to // /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset // // Specify option -o to overwrite individual components of the given // [lang].traineddata file. For DC, to overwrite language config // and unichar ambiguities files in tessdata/eng.traineddata use: // // combine_tessdata -o tessdata/eng.traineddata // /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs // // As a result, tessdata/eng.traineddata will contain the new language config // and unichar ambigs, plus all the original DAWGs, classifier teamples, etc. // // Note: the file names of the files to extract to and to overwrite from should // have the appropriate file suffixes (extensions) indicating their tessdata // component type (.unicharset for the unicharset, .unicharambigs for unichar // ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h. // // Specify option -u to unpack all the components to the specified path: // // combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng. // // This will create /home/$USER/temp/eng.* files with individual tessdata // components from tessdata/eng.traineddata. // int main(int argc, char **argv) { int i; if (argc == 2) { printf("Combining tessdata files\n"); STRING lang = argv[1]; char* last = &argv[1][strlen(argv[1])-1]; if (*last != '.') lang += '.'; STRING output_file = lang; output_file += kTrainedDataSuffix; if (!tesseract::TessdataManager::CombineDataFiles( lang.string(), output_file.string())) { printf("Error combining tessdata files into %s\n", output_file.string()); } else { printf("Output %s created sucessfully.\n", output_file.string()); } } else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) { // Initialize TessdataManager with the data in the given traineddata file. tesseract::TessdataManager tm; tm.Init(argv[2], 0); printf("Extracting tessdata components from %s\n", argv[2]); if (strcmp(argv[1], "-e") == 0) { for (i = 3; i < argc; ++i) { if (tm.ExtractToFile(argv[i])) { printf("Wrote %s\n", argv[i]); } else { printf("Not extracting %s, since this component" " is not present\n", argv[i]); } } } else { // extract all the components for (i = 0; i < tesseract::TESSDATA_NUM_ENTRIES; ++i) { STRING filename = argv[3]; char* last = &argv[3][strlen(argv[3])-1]; if (*last != '.') filename += '.'; filename += tesseract::kTessdataFileSuffixes[i]; if (tm.ExtractToFile(filename.string())) { printf("Wrote %s\n", filename.string()); } } } tm.End(); } else if (argc >= 4 && strcmp(argv[1], "-o") == 0) { // Rename the current traineddata file to a temporary name. const char *new_traineddata_filename = argv[2]; STRING traineddata_filename = new_traineddata_filename; traineddata_filename += ".__tmp__"; if (rename(new_traineddata_filename, traineddata_filename.string()) != 0) { tprintf("Failed to create a temporary file %s\n", traineddata_filename.string()); exit(1); } // Initialize TessdataManager with the data in the given traineddata file. tesseract::TessdataManager tm; tm.Init(traineddata_filename.string(), 0); // Write the updated traineddata file. tm.OverwriteComponents(new_traineddata_filename, argv+3, argc-3); tm.End(); } else { printf("Usage for combining tessdata components:\n" " %s language_data_path_prefix\n" " (e.g. %s tessdata/eng.)\n\n", argv[0], argv[0]); printf("Usage for extracting tessdata components:\n" " %s -e traineddata_file [output_component_file...]\n" " (e.g. %s -e eng.traineddata eng.unicharset)\n\n", argv[0], argv[0]); printf("Usage for overwriting tessdata components:\n" " %s -o traineddata_file [input_component_file...]\n" " (e.g. %s -o eng.traineddata eng.unicharset)\n\n", argv[0], argv[0]); printf("Usage for unpacking all tessdata components:\n" " %s -u traineddata_file output_path_prefix\n" " (e.g. %s -u eng.traineddata tmp/eng.)\n", argv[0], argv[0]); return 1; } }
// Main program to combine/extract/overwrite tessdata components // in [lang].traineddata files. // // To combine all the individual tessdata components (unicharset, DAWGs, // classifier templates, ambiguities, language configs) located at, say, // /home/$USER/temp/eng.* run: // // combine_tessdata /home/$USER/temp/eng. // // The result will be a combined tessdata file /home/$USER/temp/eng.traineddata // // Specify option -e if you would like to extract individual components // from a combined traineddata file. For example, to extract language config // file and the unicharset from tessdata/eng.traineddata run: // // combine_tessdata -e tessdata/eng.traineddata // /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset // // The desired config file and unicharset will be written to // /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset // // Specify option -o to overwrite individual components of the given // [lang].traineddata file. For example, to overwrite language config // and unichar ambiguities files in tessdata/eng.traineddata use: // // combine_tessdata -o tessdata/eng.traineddata // /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs // // As a result, tessdata/eng.traineddata will contain the new language config // and unichar ambigs, plus all the original DAWGs, classifier teamples, etc. // // Note: the file names of the files to extract to and to overwrite from should // have the appropriate file suffixes (extensions) indicating their tessdata // component type (.unicharset for the unicharset, .unicharambigs for unichar // ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h. // // Specify option -u to unpack all the components to the specified path: // // combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng. // // This will create /home/$USER/temp/eng.* files with individual tessdata // components from tessdata/eng.traineddata. // int main(int argc, char **argv) { tesseract::CheckSharedLibraryVersion(); int i; tesseract::TessdataManager tm; if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) { printf("%s\n", tesseract::TessBaseAPI::Version()); return EXIT_SUCCESS; } else if (argc == 2) { printf("Combining tessdata files\n"); STRING lang = argv[1]; char* last = &argv[1][strlen(argv[1])-1]; if (*last != '.') lang += '.'; STRING output_file = lang; output_file += kTrainedDataSuffix; if (!tm.CombineDataFiles(lang.string(), output_file.string())) { printf("Error combining tessdata files into %s\n", output_file.string()); } else { printf("Output %s created successfully.\n", output_file.string()); } } else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) { // Initialize TessdataManager with the data in the given traineddata file. if (!tm.Init(argv[2])) { tprintf("Failed to read %s\n", argv[2]); return EXIT_FAILURE; } printf("Extracting tessdata components from %s\n", argv[2]); if (strcmp(argv[1], "-e") == 0) { for (i = 3; i < argc; ++i) { errno = 0; if (tm.ExtractToFile(argv[i])) { printf("Wrote %s\n", argv[i]); } else if (errno == 0) { printf("Not extracting %s, since this component" " is not present\n", argv[i]); return EXIT_FAILURE; } else { printf("Error, could not extract %s: %s\n", argv[i], strerror(errno)); return EXIT_FAILURE; } } } else { // extract all the components for (i = 0; i < tesseract::TESSDATA_NUM_ENTRIES; ++i) { STRING filename = argv[3]; char* last = &argv[3][strlen(argv[3])-1]; if (*last != '.') filename += '.'; filename += tesseract::kTessdataFileSuffixes[i]; errno = 0; if (tm.ExtractToFile(filename.string())) { printf("Wrote %s\n", filename.string()); } else if (errno != 0) { printf("Error, could not extract %s: %s\n", filename.string(), strerror(errno)); return EXIT_FAILURE; } } } } else if (argc >= 4 && strcmp(argv[1], "-o") == 0) { // Rename the current traineddata file to a temporary name. const char *new_traineddata_filename = argv[2]; STRING traineddata_filename = new_traineddata_filename; traineddata_filename += ".__tmp__"; if (rename(new_traineddata_filename, traineddata_filename.string()) != 0) { tprintf("Failed to create a temporary file %s\n", traineddata_filename.string()); return EXIT_FAILURE; } // Initialize TessdataManager with the data in the given traineddata file. tm.Init(traineddata_filename.string()); // Write the updated traineddata file. tm.OverwriteComponents(new_traineddata_filename, argv+3, argc-3); } else if (argc == 3 && strcmp(argv[1], "-c") == 0) { if (!tm.Init(argv[2])) { tprintf("Failed to read %s\n", argv[2]); return EXIT_FAILURE; } tesseract::TFile fp; if (!tm.GetComponent(tesseract::TESSDATA_LSTM, &fp)) { tprintf("No LSTM Component found in %s!\n", argv[2]); return EXIT_FAILURE; } tesseract::LSTMRecognizer recognizer; if (!recognizer.DeSerialize(&tm, &fp)) { tprintf("Failed to deserialize LSTM in %s!\n", argv[2]); return EXIT_FAILURE; } recognizer.ConvertToInt(); GenericVector<char> lstm_data; fp.OpenWrite(&lstm_data); ASSERT_HOST(recognizer.Serialize(&tm, &fp)); tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0], lstm_data.size()); if (!tm.SaveFile(argv[2], nullptr)) { tprintf("Failed to write modified traineddata:%s!\n", argv[2]); return EXIT_FAILURE; } } else if (argc == 3 && strcmp(argv[1], "-d") == 0) { // Initialize TessdataManager with the data in the given traineddata file. tm.Init(argv[2]); } else { printf("Usage for combining tessdata components:\n" " %s language_data_path_prefix\n" " (e.g. %s tessdata/eng.)\n\n", argv[0], argv[0]); printf("Usage for extracting tessdata components:\n" " %s -e traineddata_file [output_component_file...]\n" " (e.g. %s -e eng.traineddata eng.unicharset)\n\n", argv[0], argv[0]); printf("Usage for overwriting tessdata components:\n" " %s -o traineddata_file [input_component_file...]\n" " (e.g. %s -o eng.traineddata eng.unicharset)\n\n", argv[0], argv[0]); printf("Usage for unpacking all tessdata components:\n" " %s -u traineddata_file output_path_prefix\n" " (e.g. %s -u eng.traineddata tmp/eng.)\n", argv[0], argv[0]); printf( "Usage for listing directory of components:\n" " %s -d traineddata_file\n", argv[0]); printf( "Usage for compacting LSTM component to int:\n" " %s -c traineddata_file\n", argv[0]); return 1; } tm.Directory(); return EXIT_SUCCESS; }
// This function takes tif/box pair of files and runs recognition on the image, // while making sure that the word bounds that tesseract identified roughly // match to those specified by the input box file. For each word (ngram in a // single bounding box from the input box file) it outputs the ocred result, // the correct label, rating and certainty. void Tesseract::recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file) { STRING box_fname = fname; const char *lastdot = strrchr(box_fname.string(), '.'); if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0'; box_fname += ".box"; // read_next_box() will close box_file FILE *box_file = open_file(box_fname.string(), "r"); PAGE_RES_IT page_res_it; page_res_it.page_res = page_res; page_res_it.restart_page(); STRING label; // Process all the words on this page. TBOX tbox; // tesseract-identified box TBOX bbox; // box from the box file bool keep_going; int line_number = 0; int examined_words = 0; do { keep_going = read_t(&page_res_it, &tbox); keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox); // Align bottom left points of the TBOXes. while (keep_going && !NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) { if (bbox.bottom() < tbox.bottom()) { page_res_it.forward(); keep_going = read_t(&page_res_it, &tbox); } else { keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox); } } while (keep_going && !NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) { if (bbox.left() > tbox.left()) { page_res_it.forward(); keep_going = read_t(&page_res_it, &tbox); } else { keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox); } } // OCR the word if top right points of the TBOXes are similar. if (keep_going && NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) && NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) { ambigs_classify_and_output(label.string(), &page_res_it, output_file); examined_words++; } page_res_it.forward(); } while (keep_going); fclose(box_file); // Set up scripts on all of the words that did not get sent to // ambigs_classify_and_output. They all should have, but if all the // werd_res's don't get uch_sets, tesseract will crash when you try // to iterate over them. :-( int total_words = 0; for (page_res_it.restart_page(); page_res_it.block() != NULL; page_res_it.forward()) { if (page_res_it.word()) { if (page_res_it.word()->uch_set == NULL) page_res_it.word()->SetupFake(unicharset); total_words++; } } if (examined_words < 0.85 * total_words) { tprintf("TODO(antonova): clean up recog_training_segmented; " " It examined only a small fraction of the ambigs image.\n"); } tprintf("recog_training_segmented: examined %d / %d words.\n", examined_words, total_words); }
int main(int argc, char **argv) { #ifdef USE_NLS setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); #endif if ((argc == 2 && strcmp(argv[1], "-v") == 0) || (argc == 2 && strcmp(argv[1], "--version") == 0)) { fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); exit(0); } // Make the order of args a bit more forgiving than it used to be. const char* lang = "eng"; const char* image = NULL; const char* output = NULL; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; int arg = 1; while (arg < argc && (output == NULL || argv[arg][0] == '-')) { if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) { lang = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) { pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1])); ++arg; } else if (image == NULL) { image = argv[arg]; } else if (output == NULL) { output = argv[arg]; } ++arg; } if (output == NULL) { fprintf(stderr, _("Usage:%s imagename outputbase [-l lang] " "[-psm pagesegmode] [configfile...]\n"), argv[0]); fprintf(stderr, _("pagesegmode values are:\n" "0 = Orientation and script detection (OSD) only.\n" "1 = Automatic page segmentation with OSD.\n" "2 = Automatic page segmentation, but no OSD, or OCR\n" "3 = Fully automatic page segmentation, but no OSD. (Default)\n" "4 = Assume a single column of text of variable sizes.\n" "5 = Assume a single uniform block of vertically aligned text.\n" "6 = Assume a single uniform block of text.\n" "7 = Treat the image as a single text line.\n" "8 = Treat the image as a single word.\n" "9 = Treat the image as a single word in a circle.\n" "10 = Treat the image as a single character.\n")); fprintf(stderr, _("-l lang and/or -psm pagesegmode must occur before any" "configfile.\n")); exit(1); } tesseract::TessBaseAPI api; api.SetOutputName(output); api.Init(argv[0], lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc - arg, NULL, NULL, false); api.SetPageSegMode(pagesegmode); tprintf(_("Tesseract Open Source OCR Engine v%s with Leptonica\n"), tesseract::TessBaseAPI::Version()); FILE* fin = fopen(image, "rb"); if (fin == NULL) { printf("Cannot open input file: %s\n", image); exit(2); } fclose(fin); PIX *pixs; if ((pixs = pixRead(image)) == NULL) { printf("Unsupported image type.\n"); exit(3); } pixDestroy(&pixs); STRING text_out; if (!api.ProcessPages(image, NULL, 0, &text_out)) { tprintf(_("Error during processing.\n")); } bool output_hocr = false; api.GetBoolVariable("tessedit_create_hocr", &output_hocr); bool output_box = false; api.GetBoolVariable("tessedit_create_boxfile", &output_box); STRING outfile = output; outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt"; FILE* fout = fopen(outfile.string(), "wb"); if (fout == NULL) { tprintf(_("Cannot create output file %s\n"), outfile.string()); exit(1); } fwrite(text_out.string(), 1, text_out.length(), fout); fclose(fout); return 0; // Normal exit }
void ParseCommandLineFlags(const char* usage, int* argc, char*** argv, const bool remove_flags) { if (*argc == 1) { tprintf("USAGE: %s\n", usage); PrintCommandLineFlags(); exit(0); } unsigned int i = 1; for (i = 1; i < *argc; ++i) { const char* current_arg = (*argv)[i]; // If argument does not start with a hyphen then break. if (current_arg[0] != '-') { break; } // Position current_arg after startings hyphens. We treat a sequence of // consecutive hyphens of any length identically. while (*current_arg == '-') { ++current_arg; } // If this is asking for usage, print the help message and abort. if (!strcmp(current_arg, "help") || !strcmp(current_arg, "helpshort")) { tprintf("USAGE: %s\n", usage); PrintCommandLineFlags(); exit(0); } // Find the starting position of the value if it was specified in this // string. const char* equals_position = strchr(current_arg, '='); const char* rhs = NULL; if (equals_position != NULL) { rhs = equals_position + 1; } // Extract the flag name. STRING lhs; if (equals_position == NULL) { lhs = current_arg; } else { lhs.assign(current_arg, equals_position - current_arg); } if (!lhs.length()) { tprintf("ERROR: Bad argument: %s\n", (*argv)[i]); exit(1); } // Find the flag name in the list of global flags. // inT32 flag inT32 int_val; if (IntFlagExists(lhs.string(), &int_val)) { if (rhs != NULL) { if (!strlen(rhs)) { // Bad input of the format --int_flag= tprintf("ERROR: Bad argument: %s\n", (*argv)[i]); exit(1); } if (!SafeAtoi(rhs, &int_val)) { tprintf("ERROR: Could not parse int from %s in flag %s\n", rhs, (*argv)[i]); exit(1); } } else { // We need to parse the next argument if (i + 1 >= *argc) { tprintf("ERROR: Could not find value argument for flag %s\n", lhs.string()); exit(1); } else { ++i; if (!SafeAtoi((*argv)[i], &int_val)) { tprintf("ERROR: Could not parse inT32 from %s\n", (*argv)[i]); exit(1); } } } SetIntFlagValue(lhs.string(), int_val); continue; } // double flag double double_val; if (DoubleFlagExists(lhs.string(), &double_val)) { if (rhs != NULL) { if (!strlen(rhs)) { // Bad input of the format --double_flag= tprintf("ERROR: Bad argument: %s\n", (*argv)[i]); exit(1); } if (!SafeAtod(rhs, &double_val)) { tprintf("ERROR: Could not parse double from %s in flag %s\n", rhs, (*argv)[i]); exit(1); } } else { // We need to parse the next argument if (i + 1 >= *argc) { tprintf("ERROR: Could not find value argument for flag %s\n", lhs.string()); exit(1); } else { ++i; if (!SafeAtod((*argv)[i], &double_val)) { tprintf("ERROR: Could not parse double from %s\n", (*argv)[i]); exit(1); } } } SetDoubleFlagValue(lhs.string(), double_val); continue; } // Bool flag. Allow input forms --flag (equivalent to --flag=true), // --flag=false, --flag=true, --flag=0 and --flag=1 bool bool_val; if (BoolFlagExists(lhs.string(), &bool_val)) { if (rhs == NULL) { // --flag form bool_val = true; } else { if (!strlen(rhs)) { // Bad input of the format --bool_flag= tprintf("ERROR: Bad argument: %s\n", (*argv)[i]); exit(1); } if (!strcmp(rhs, "false") || !strcmp(rhs, "0")) { bool_val = false; } else if (!strcmp(rhs, "true") || !strcmp(rhs, "1")) { bool_val = true; } else { tprintf("ERROR: Could not parse bool from flag %s\n", (*argv)[i]); exit(1); } } SetBoolFlagValue(lhs.string(), bool_val); continue; } // string flag const char* string_val; if (StringFlagExists(lhs.string(), &string_val)) { if (rhs != NULL) { string_val = rhs; } else { // Pick the next argument if (i + 1 >= *argc) { tprintf("ERROR: Could not find string value for flag %s\n", lhs.string()); exit(1); } else { string_val = (*argv)[++i]; } } SetStringFlagValue(lhs.string(), string_val); continue; } // Flag was not found. Exit with an error message. tprintf("ERROR: Non-existent flag %s\n", (*argv)[i]); exit(1); } // for each argv if (remove_flags) { (*argv)[i - 1] = (*argv)[0]; (*argv) += (i - 1); (*argc) -= (i - 1); } }
void edges_and_textord( //read .pb file const char *filename, //.pb file BLOCK_LIST *blocks) { BLOCK *block; //current block char *lastdot; //of name STRING name = filename; //truncated name ICOORD page_tr; BOX page_box; //bounding_box PDBLK_CLIST pd_blocks; //copy of list BLOCK_IT block_it = blocks; //iterator PDBLK_C_IT pd_it = &pd_blocks; //iterator //different orientations TO_BLOCK_LIST land_blocks, port_blocks; IMAGE thresh_image; //thresholded lastdot = strrchr (name.string (), '.'); if (lastdot != NULL) *lastdot = '\0'; if (page_image.get_bpp () == 0) { name += tessedit_image_ext; if (page_image.read_header (name.string ())) CANTOPENFILE.error ("edges_and_textord", EXIT, name.string ()); if (page_image.read (0)) READFAILED.error ("edges_and_textord", EXIT, name.string ()); name = filename; lastdot = strrchr (name.string (), '.'); if (lastdot != NULL) *lastdot = '\0'; } page_tr = ICOORD (page_image.get_xsize (), page_image.get_ysize ()); read_pd_file (name, page_image.get_xsize (), page_image.get_ysize (), blocks); block_it.set_to_list (blocks); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; if (page_image.get_bpp () > 1) { set_global_loc_code(LOC_ADAPTIVE); for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); pd_it.add_after_then_move (block); } // adaptive_threshold(&page_image,&pd_blocks,&thresh_image); set_global_loc_code(LOC_EDGE_PROG); #ifndef EMBEDDED previous_cpu = clock (); #endif for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); if (!polygon_tess_approximation) invert_image(&page_image); #ifndef GRAPHICS_DISABLED extract_edges(NO_WINDOW, &page_image, &thresh_image, page_tr, block); #else extract_edges(&page_image, &thresh_image, page_tr, block); #endif page_box += block->bounding_box (); } page_image = thresh_image; //everyone else gets it } else { set_global_loc_code(LOC_EDGE_PROG); if (!page_image.white_high ()) invert_image(&page_image); #ifndef EMBEDDED previous_cpu = clock (); #endif for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); #ifndef GRAPHICS_DISABLED extract_edges(NO_WINDOW, &page_image, &page_image, page_tr, block); #else extract_edges(&page_image, &page_image, page_tr, block); #endif page_box += block->bounding_box (); } } if (global_monitor != NULL) { global_monitor->ocr_alive = TRUE; global_monitor->progress = 10; } assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape); #ifndef EMBEDDED previous_cpu = clock (); #endif filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks); }
// Find all editable parameters used within tesseract and create a // SVMenuNode tree from it. // TODO (wanke): This is actually sort of hackish. SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) { SVMenuNode* mr = new SVMenuNode(); ParamContent_LIST vclist; ParamContent_IT vc_it(&vclist); // Amount counts the number of entries for a specific char*. // TODO(rays) get rid of the use of std::map. std::map<const char*, int> amount; // Add all parameters to a list. int v, i; int num_iterations = (tess->params() == NULL) ? 1 : 2; for (v = 0; v < num_iterations; ++v) { tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params(); for (i = 0; i < vec->int_params.size(); ++i) { vc_it.add_after_then_move(new ParamContent(vec->int_params[i])); } for (i = 0; i < vec->bool_params.size(); ++i) { vc_it.add_after_then_move(new ParamContent(vec->bool_params[i])); } for (i = 0; i < vec->string_params.size(); ++i) { vc_it.add_after_then_move(new ParamContent(vec->string_params[i])); } for (i = 0; i < vec->double_params.size(); ++i) { vc_it.add_after_then_move(new ParamContent(vec->double_params[i])); } } // Count the # of entries starting with a specific prefix. for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) { ParamContent* vc = vc_it.data(); STRING tag; STRING tag2; STRING tag3; GetPrefixes(vc->GetName(), &tag, &tag2, &tag3); amount[tag.string()]++; amount[tag2.string()]++; amount[tag3.string()]++; } vclist.sort(ParamContent::Compare); // Sort the list alphabetically. SVMenuNode* other = mr->AddChild("OTHER"); // go through the list again and this time create the menu structure. vc_it.move_to_first(); for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) { ParamContent* vc = vc_it.data(); STRING tag; STRING tag2; STRING tag3; GetPrefixes(vc->GetName(), &tag, &tag2, &tag3); if (amount[tag.string()] == 1) { other->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(), vc->GetDescription()); } else { // More than one would use this submenu -> create submenu. SVMenuNode* sv = mr->AddChild(tag.string()); if ((amount[tag.string()] <= MAX_ITEMS_IN_SUBMENU) || (amount[tag2.string()] <= 1)) { sv->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(), vc->GetDescription()); } else { // Make subsubmenus. SVMenuNode* sv2 = sv->AddChild(tag2.string()); sv2->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(), vc->GetDescription()); } } } return mr; }
int main(int argc, char **argv) { if ((argc == 2 && strcmp(argv[1], "-v") == 0) || (argc == 2 && strcmp(argv[1], "--version") == 0)) { char *versionStrP; fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); versionStrP = getLeptonicaVersion(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); versionStrP = getImagelibVersions(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); #ifdef USE_OPENCL cl_platform_id platform; cl_uint num_platforms; cl_device_id devices[2]; cl_uint num_devices; char info[256]; int i; fprintf(stderr, " OpenCL info:\n"); clGetPlatformIDs(1, &platform, &num_platforms); fprintf(stderr, " Found %d platforms.\n", num_platforms); clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0); fprintf(stderr, " Platform name: %s.\n", info); clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0); fprintf(stderr, " Version: %s.\n", info); clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices); fprintf(stderr, " Found %d devices.\n", num_devices); for (i = 0; i < num_devices; ++i) { clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0); fprintf(stderr, " Device %d name: %s.\n", i+1, info); } #endif exit(0); } // Make the order of args a bit more forgiving than it used to be. const char* lang = "eng"; const char* image = NULL; const char* output = NULL; const char* datapath = NULL; bool noocr = false; bool list_langs = false; bool print_parameters = false; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; int arg = 1; while (arg < argc && (output == NULL || argv[arg][0] == '-')) { if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) { lang = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "--tessdata-dir") == 0 && arg + 1 < argc) { datapath = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "--list-langs") == 0) { noocr = true; list_langs = true; } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) { pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1])); ++arg; } else if (strcmp(argv[arg], "--print-parameters") == 0) { noocr = true; print_parameters = true; } else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { // handled properly after api init ++arg; } else if (image == NULL) { image = argv[arg]; } else if (output == NULL) { output = argv[arg]; } ++arg; } if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) { list_langs = true; noocr = true; } if (output == NULL && noocr == false) { fprintf(stderr, "Usage:\n %s imagename|stdin outputbase|stdout " "[options...] [configfile...]\n\n", argv[0]); fprintf(stderr, "OCR options:\n"); fprintf(stderr, " --tessdata-dir /path\tspecify location of tessdata" " path\n"); fprintf(stderr, " -l lang[+lang]\tspecify language(s) used for OCR\n"); fprintf(stderr, " -c configvar=value\tset value for control parameter.\n" "\t\t\tMultiple -c arguments are allowed.\n"); fprintf(stderr, " -psm pagesegmode\tspecify page segmentation mode.\n"); fprintf(stderr, "These options must occur before any configfile.\n\n"); fprintf(stderr, "pagesegmode values are:\n" " 0 = Orientation and script detection (OSD) only.\n" " 1 = Automatic page segmentation with OSD.\n" " 2 = Automatic page segmentation, but no OSD, or OCR\n" " 3 = Fully automatic page segmentation, but no OSD. (Default)\n" " 4 = Assume a single column of text of variable sizes.\n" " 5 = Assume a single uniform block of vertically aligned text.\n" " 6 = Assume a single uniform block of text.\n" " 7 = Treat the image as a single text line.\n" " 8 = Treat the image as a single word.\n" " 9 = Treat the image as a single word in a circle.\n" " 10 = Treat the image as a single character.\n\n"); fprintf(stderr, "Single options:\n"); fprintf(stderr, " -v --version: version info\n"); fprintf(stderr, " --list-langs: list available languages for tesseract " "engine. Can be used with --tessdata-dir.\n"); fprintf(stderr, " --print-parameters: print tesseract parameters to the " "stdout.\n"); exit(1); } if (output != NULL && strcmp(output, "-") && strcmp(output, "stdout")) { tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", tesseract::TessBaseAPI::Version()); } PERF_COUNT_START("Tesseract:main") tesseract::TessBaseAPI api; api.SetOutputName(output); int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc - arg, NULL, NULL, false); if (rc) { fprintf(stderr, "Could not initialize tesseract.\n"); exit(1); } char opt1[255], opt2[255]; for (arg = 0; arg < argc; arg++) { if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { strncpy(opt1, argv[arg + 1], 255); *(strchr(opt1, '=')) = 0; strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255); opt2[254] = 0; ++arg; if (!api.SetVariable(opt1, opt2)) { fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); } } } if (list_langs) { GenericVector<STRING> languages; api.GetAvailableLanguagesAsVector(&languages); fprintf(stderr, "List of available languages (%d):\n", languages.size()); for (int index = 0; index < languages.size(); ++index) { STRING& string = languages[index]; fprintf(stderr, "%s\n", string.string()); } api.End(); exit(0); } if (print_parameters) { FILE* fout = stdout; fprintf(stdout, "Tesseract parameters:\n"); api.PrintVariables(fout); api.End(); exit(0); } // We have 2 possible sources of pagesegmode: a config file and // the command line. For backwards compatability reasons, the // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the // default for this program is tesseract::PSM_AUTO. We will let // the config file take priority, so the command-line default // can take priority over the tesseract default, so we use the // value from the command line only if the retrieved mode // is still tesseract::PSM_SINGLE_BLOCK, indicating no change // in any config file. Therefore the only way to force // tesseract::PSM_SINGLE_BLOCK is from the command line. // It would be simpler if we could set the value before Init, // but that doesn't work. if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) api.SetPageSegMode(pagesegmode); bool stdInput = !strcmp(image, "stdin") || !strcmp(image, "-"); Pix* pixs = NULL; if (stdInput) { char byt; GenericVector<l_uint8> ch_data; std::istream file(std::cin.rdbuf()); #ifdef WIN32 if (_setmode(_fileno(stdin), _O_BINARY) == -1) tprintf("ERROR: cin to binary: %s", strerror(errno)); #endif // WIN32 while (file.get(byt)) { ch_data.push_back(byt); } std::cin.ignore(std::cin.rdbuf()->in_avail() + 1); pixs = pixReadMem(&ch_data[0], ch_data.size()); } if (pagesegmode == tesseract::PSM_AUTO_ONLY || pagesegmode == tesseract::PSM_OSD_ONLY) { int ret_val = 0; if (!pixs) pixs = pixRead(image); if (!pixs) { fprintf(stderr, "Cannot open input file: %s\n", image); exit(2); } api.SetImage(pixs); if (pagesegmode == tesseract::PSM_OSD_ONLY) { OSResults osr; if (api.DetectOS(&osr)) { int orient = osr.best_result.orientation_id; int script_id = osr.get_best_script(orient); float orient_oco = osr.best_result.oconfidence; float orient_sco = osr.best_result.sconfidence; tprintf("Orientation: %d\nOrientation in degrees: %d\n" \ "Orientation confidence: %.2f\n" \ "Script: %d\nScript confidence: %.2f\n", orient, OrientationIdToValue(orient), orient_oco, script_id, orient_sco); } else { ret_val = 1; } } else { tesseract::Orientation orientation; tesseract::WritingDirection direction; tesseract::TextlineOrder order; float deskew_angle; tesseract::PageIterator* it = api.AnalyseLayout(); if (it) { it->Orientation(&orientation, &direction, &order, &deskew_angle); tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ "Deskew angle: %.4f\n", orientation, direction, order, deskew_angle); } else { ret_val = 1; } delete it; } pixDestroy(&pixs); exit(ret_val); } tesseract::TessResultRenderer* renderer = NULL; bool b; api.GetBoolVariable("tessedit_create_hocr", &b); if (b && renderer == NULL) renderer = new tesseract::TessHOcrRenderer(); api.GetBoolVariable("tessedit_create_pdf", &b); if (b && renderer == NULL) renderer = new tesseract::TessPDFRenderer(api.GetDatapath()); api.GetBoolVariable("tessedit_create_boxfile", &b); if (b && renderer == NULL) renderer = new tesseract::TessBoxTextRenderer(); if (renderer == NULL) renderer = new tesseract::TessTextRenderer(); if (pixs) { api.ProcessPage(pixs, 0, NULL, NULL, 0, renderer); pixDestroy(&pixs); } else { FILE* fin = fopen(image, "rb"); if (fin == NULL) { fprintf(stderr, "Cannot open input file: %s\n", image); exit(2); } fclose(fin); if (!api.ProcessPages(image, NULL, 0, renderer)) { fprintf(stderr, "Error during processing.\n"); exit(1); } } FILE* fout = stdout; if (strcmp(output, "-") && strcmp(output, "stdout")) { STRING outfile = STRING(output) + STRING(".") + STRING(renderer->file_extension()); fout = fopen(outfile.string(), "wb"); if (fout == NULL) { fprintf(stderr, "Cannot create output file %s\n", outfile.string()); exit(1); } } const char* data; inT32 data_len; if (renderer->GetOutput(&data, &data_len)) { fwrite(data, 1, data_len, fout); if (fout != stdout) fclose(fout); else clearerr(fout); } PERF_COUNT_END return 0; // Normal exit }
void Wordrec::save_summary(inT32 elapsed_time) { #ifndef SECURE_NAMES STRING outfilename; FILE *f; int x; int total; outfilename = imagefile + ".sta"; f = open_file (outfilename.string(), "w"); fprintf (f, INT32FORMAT " seconds elapsed\n", elapsed_time); fprintf (f, "\n"); fprintf (f, "%d characters\n", character_count); fprintf (f, "%d words\n", word_count); fprintf (f, "\n"); fprintf (f, "%d permutations performed\n", permutation_count); fprintf (f, "%d characters classified\n", chars_classified); fprintf (f, "%4.0f%% classification overhead\n", (float) chars_classified / character_count * 100.0 - 100.0); fprintf (f, "\n"); fprintf (f, "%d words chopped (pass 1) ", words_chopped1); fprintf (f, " (%0.0f%%)\n", (float) words_chopped1 / word_count * 100); fprintf (f, "%d chops performed\n", chops_performed1); fprintf (f, "%d chops attempted\n", chops_attempted1); fprintf (f, "\n"); fprintf (f, "%d words joined (pass 1)", words_segmented1); fprintf (f, " (%0.0f%%)\n", (float) words_segmented1 / word_count * 100); fprintf (f, "%d segmentation states\n", segmentation_states1); fprintf (f, "%d segmentations timed out\n", states_timed_out1); fprintf (f, "\n"); fprintf (f, "%d words chopped (pass 2) ", words_chopped2); fprintf (f, " (%0.0f%%)\n", (float) words_chopped2 / word_count * 100); fprintf (f, "%d chops performed\n", chops_performed2); fprintf (f, "%d chops attempted\n", chops_attempted2); fprintf (f, "\n"); fprintf (f, "%d words joined (pass 2)", words_segmented2); fprintf (f, " (%0.0f%%)\n", (float) words_segmented2 / word_count * 100); fprintf (f, "%d segmentation states\n", segmentation_states2); fprintf (f, "%d segmentations timed out\n", states_timed_out2); fprintf (f, "\n"); total = 0; iterate_tally (states_before_best, x) total += (tally_entry (states_before_best, x) * x); fprintf (f, "segmentations (before best) = %d\n", total); if (total != 0.0) fprintf (f, "%4.0f%% segmentation overhead\n", (float) (segmentation_states1 + segmentation_states2) / total * 100.0 - 100.0); fprintf (f, "\n"); print_tally (f, "segmentations (before best)", states_before_best); iterate_tally (best_certainties[0], x) cprintf ("best certainty of %8.4f = %4d %4d\n", x * CERTAINTY_BUCKET_SIZE, tally_entry (best_certainties[0], x), tally_entry (best_certainties[1], x)); PrintIntMatcherStats(f); dj_statistics(f); fclose(f); #endif }