// Displays the segmentation state of *this (if not the same as the last // one displayed) and waits for a click in the window. void WERD_CHOICE::DisplaySegmentation(TWERD* word) { #ifndef GRAPHICS_DISABLED // Number of different colors to draw with. const int kNumColors = 6; static ScrollView *segm_window = NULL; // Check the state against the static prev_drawn_state. static GenericVector<int> prev_drawn_state; bool already_done = prev_drawn_state.size() == length_; if (!already_done) prev_drawn_state.init_to_size(length_, 0); for (int i = 0; i < length_; ++i) { if (prev_drawn_state[i] != state_[i]) { already_done = false; } prev_drawn_state[i] = state_[i]; } if (already_done || word->blobs.empty()) return; // Create the window if needed. if (segm_window == NULL) { segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, 2000.0, 256.0, true); } else { segm_window->Clear(); } TBOX bbox; int blob_index = 0; for (int c = 0; c < length_; ++c) { ScrollView::Color color = static_cast<ScrollView::Color>(c % kNumColors + 3); for (int i = 0; i < state_[c]; ++i, ++blob_index) { TBLOB* blob = word->blobs[blob_index]; bbox += blob->bounding_box(); blob->plot(segm_window, color, color); } } segm_window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); segm_window->Update(); window_wait(segm_window); #endif }
/** * @name test_insert_seam * * @returns true if insert_seam will succeed. */ bool test_insert_seam(const GenericVector<SEAM*>& seam_array, TWERD *word, int index) { SEAM *test_seam; int list_length = seam_array.size(); for (int test_index = 0; test_index < index; ++test_index) { test_seam = seam_array[test_index]; if (test_index + test_seam->widthp < index && test_seam->widthp + test_index == index - 1 && account_splits(test_seam, word, test_index + 1, 1) < 0) return false; } for (int test_index = index; test_index < list_length; test_index++) { test_seam = seam_array[test_index]; if (test_index - test_seam->widthn >= index && test_index - test_seam->widthn == index && account_splits(test_seam, word, test_index + 1, -1) < 0) return false; } return true; }
// Accumulates counts for junk. Counts only whether the junk was correctly // rejected or not. void ErrorCounter::AccumulateJunk(const ShapeTable& shape_table, const GenericVector<ShapeRating>& results, TrainingSample* sample) { // For junk we accept no answer, or an explicit shape answer matching the // class id of the sample. int num_results = results.size(); int font_id = sample->font_id(); int unichar_id = sample->class_id(); if (num_results > 0 && !shape_table.GetShape(results[0].shape_id).ContainsUnichar(unichar_id)) { // This is a junk error. ++font_counts_[font_id].n[CT_ACCEPTED_JUNK]; sample->set_is_error(true); // It counts as an error for boosting too so sum the weight. scaled_error_ += sample->weight(); } else { // Correctly rejected. ++font_counts_[font_id].n[CT_REJECTED_JUNK]; sample->set_is_error(false); } }
// Computes the features used by the subset of samples defined by // the iterator and sets up the feature mapping. // Returns the size of the compacted feature space. int IntFeatureMap::FindNZFeatureMapping(SampleIterator* it) { feature_map_.Init(feature_space_.Size(), false); int total_samples = 0; for (it->Begin(); !it->AtEnd(); it->Next()) { const TrainingSample& sample = it->GetSample(); GenericVector<int> features; feature_space_.IndexAndSortFeatures(sample.features(), sample.num_features(), &features); int num_features = features.size(); for (int f = 0; f < num_features; ++f) feature_map_.SetMap(features[f], true); ++total_samples; } feature_map_.Setup(); compact_size_ = feature_map_.CompactSize(); mapping_changed_ = true; FinalizeMapping(it); tprintf("%d non-zero features found in %d samples\n", compact_size_, total_samples); return compact_size_; }
// Displays the labels and cuts at the corresponding xcoords. // Size of labels should match xcoords. void LSTMRecognizer::DisplayLSTMOutput(const GenericVector<int>& labels, const GenericVector<int>& xcoords, int height, ScrollView* window) { #ifndef GRAPHICS_DISABLED // do nothing if there's no graphics int x_scale = network_->XScaleFactor(); window->TextAttributes("Arial", height / 4, false, false, false); int end = 1; for (int start = 0; start < labels.size(); start = end) { int xpos = xcoords[start] * x_scale; if (labels[start] == null_char_) { end = start + 1; window->Pen(ScrollView::RED); } else { window->Pen(ScrollView::GREEN); const char* str = DecodeLabel(labels, start, &end, NULL); if (*str == '\\') str = "\\\\"; xpos = xcoords[(start + end) / 2] * x_scale; window->Text(xpos, height, str); } window->Line(xpos, 0, xpos, height * 3 / 2); } window->Update(); #endif // GRAPHICS_DISABLED }
// Prints debug output detailing the activation path that is implied by the // label_coords. void LSTMRecognizer::DebugActivationPath(const NetworkIO& outputs, const GenericVector<int>& labels, const GenericVector<int>& xcoords) { if (xcoords[0] > 0) DebugActivationRange(outputs, "<null>", null_char_, 0, xcoords[0]); int end = 1; for (int start = 0; start < labels.size(); start = end) { if (labels[start] == null_char_) { end = start + 1; DebugActivationRange(outputs, "<null>", null_char_, xcoords[start], xcoords[end]); continue; } else { int decoded; const char* label = DecodeLabel(labels, start, &end, &decoded); DebugActivationRange(outputs, label, labels[start], xcoords[start], xcoords[start + 1]); for (int i = start + 1; i < end; ++i) { DebugActivationRange(outputs, DecodeSingleLabel(labels[i]), labels[i], xcoords[i], xcoords[i + 1]); } } } }
int main(int argc, char **argv) { if ((argc == 2 && strcmp(argv[1], "-v") == 0) || (argc == 2 && strcmp(argv[1], "--version") == 0)) { char *versionStrP; fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); versionStrP = getLeptonicaVersion(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); versionStrP = getImagelibVersions(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); exit(0); } // Make the order of args a bit more forgiving than it used to be. const char* lang = "eng"; const char* image = NULL; const char* output = NULL; bool noocr = false; bool list_langs = false; bool print_parameters = false; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; int arg = 1; while (arg < argc && (output == NULL || argv[arg][0] == '-')) { if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) { lang = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) { pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1])); ++arg; } else if (strcmp(argv[arg], "--print-parameters") == 0) { noocr = true; print_parameters = true; } else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { // handled properly after api init ++arg; } else if (image == NULL) { image = argv[arg]; } else if (output == NULL) { output = argv[arg]; } ++arg; } if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) { list_langs = true; noocr = true; } if (output == NULL && noocr == false) { fprintf(stderr, "Usage:%s imagename outputbase|stdout [-l lang] " "[-psm pagesegmode] [-c configvar=value] " "[configfile...]\n\n", argv[0]); fprintf(stderr, "pagesegmode values are:\n" "0 = Orientation and script detection (OSD) only.\n" "1 = Automatic page segmentation with OSD.\n" "2 = Automatic page segmentation, but no OSD, or OCR\n" "3 = Fully automatic page segmentation, but no OSD. (Default)\n" "4 = Assume a single column of text of variable sizes.\n" "5 = Assume a single uniform block of vertically aligned text.\n" "6 = Assume a single uniform block of text.\n" "7 = Treat the image as a single text line.\n" "8 = Treat the image as a single word.\n" "9 = Treat the image as a single word in a circle.\n" "10 = Treat the image as a single character.\n"); fprintf(stderr, "multiple -c arguments are allowed.\n"); fprintf(stderr, "-l lang, -psm pagesegmode and any -c options must occur" "before any configfile.\n\n"); fprintf(stderr, "Single options:\n"); fprintf(stderr, " -v --version: version info\n"); fprintf(stderr, " --list-langs: list available languages for tesseract " "engine\n"); fprintf(stderr, " --print-parameters: print tesseract parameters to the " "stdout\n"); exit(1); } tesseract::TessBaseAPI api; STRING tessdata_dir; truncate_path(argv[0], &tessdata_dir); api.SetOutputName(output); int rc = api.Init(tessdata_dir.string(), lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc - arg, NULL, NULL, false); if (rc) { fprintf(stderr, "Could not initialize tesseract.\n"); exit(1); } char opt1[255], opt2[255]; for (arg = 0; arg < argc; arg++) { if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { strncpy(opt1, argv[arg + 1], 255); *(strchr(opt1, '=')) = 0; strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255); opt2[254] = 0; ++arg; if(!api.SetVariable(opt1, opt2)) { fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); } } } if (list_langs) { GenericVector<STRING> languages; api.GetAvailableLanguagesAsVector(&languages); fprintf(stderr, "List of available languages (%d):\n", languages.size()); for (int index = 0; index < languages.size(); ++index) { STRING& string = languages[index]; fprintf(stderr, "%s\n", string.string()); } api.End(); exit(0); } if (print_parameters) { FILE* fout = stdout; fprintf(stdout, "Tesseract parameters:\n"); api.PrintVariables(fout); api.End(); exit(0); } // We have 2 possible sources of pagesegmode: a config file and // the command line. For backwards compatability reasons, the // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the // default for this program is tesseract::PSM_AUTO. We will let // the config file take priority, so the command-line default // can take priority over the tesseract default, so we use the // value from the command line only if the retrieved mode // is still tesseract::PSM_SINGLE_BLOCK, indicating no change // in any config file. Therefore the only way to force // tesseract::PSM_SINGLE_BLOCK is from the command line. // It would be simpler if we could set the value before Init, // but that doesn't work. if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) api.SetPageSegMode(pagesegmode); tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", tesseract::TessBaseAPI::Version()); FILE* fin = fopen(image, "rb"); if (fin == NULL) { fprintf(stderr, "Cannot open input file: %s\n", image); exit(2); } fclose(fin); bool output_hocr = false; api.GetBoolVariable("tessedit_create_hocr", &output_hocr); bool output_box = false; api.GetBoolVariable("tessedit_create_boxfile", &output_box); FILE* fout = stdout; if (strcmp(output, "-") && strcmp(output, "stdout")) { STRING outfile = output; outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt"; fout = fopen(outfile.string(), "wb"); if (fout == NULL) { fprintf(stderr, "Cannot create output file %s\n", outfile.string()); exit(1); } } STRING text_out; if (!api.ProcessPages(image, NULL, 0, &text_out)) { fprintf(stderr, "Error during processing.\n"); if (fout != stdout) fclose(fout); exit(1); } fwrite(text_out.string(), 1, text_out.length(), fout); if (fout != stdout) fclose(fout); else clearerr(fout); return 0; // Normal exit }
// Accumulates the errors from the classifier results on a single sample. // Returns true if debug is true and a CT_UNICHAR_TOPN_ERR error occurred. // boosting_mode selects the type of error to be used for boosting and the // is_error_ member of sample is set according to whether the required type // of error occurred. The font_table provides access to font properties // for error counting and shape_table is used to understand the relationship // between unichar_ids and shape_ids in the results bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode, const UnicityTable<FontInfo>& font_table, const ShapeTable& shape_table, const GenericVector<ShapeRating>& results, TrainingSample* sample) { int num_results = results.size(); int res_index = 0; bool debug_it = false; int font_id = sample->font_id(); int unichar_id = sample->class_id(); sample->set_is_error(false); if (num_results == 0) { // Reject. We count rejects as a separate category, but still mark the // sample as an error in case any training module wants to use that to // improve the classifier. sample->set_is_error(true); ++font_counts_[font_id].n[CT_REJECT]; } else if (shape_table.GetShape(results[0].shape_id). ContainsUnicharAndFont(unichar_id, font_id)) { ++font_counts_[font_id].n[CT_SHAPE_TOP_CORRECT]; // Unichar and font OK, but count if multiple unichars. if (shape_table.GetShape(results[0].shape_id).size() > 1) ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR]; } else { // This is a top shape error. ++font_counts_[font_id].n[CT_SHAPE_TOP_ERR]; // Check to see if any font in the top choice has attributes that match. bool attributes_match = false; uinT32 font_props = font_table.get(font_id).properties; const Shape& shape = shape_table.GetShape(results[0].shape_id); for (int c = 0; c < shape.size() && !attributes_match; ++c) { for (int f = 0; f < shape[c].font_ids.size(); ++f) { if (font_table.get(shape[c].font_ids[f]).properties == font_props) { attributes_match = true; break; } } } // TODO(rays) It is easy to add counters for individual font attributes // here if we want them. if (!attributes_match) ++font_counts_[font_id].n[CT_FONT_ATTR_ERR]; if (boosting_mode == CT_SHAPE_TOP_ERR) sample->set_is_error(true); // Find rank of correct unichar answer. (Ignoring the font.) while (res_index < num_results && !shape_table.GetShape(results[res_index].shape_id). ContainsUnichar(unichar_id)) { ++res_index; } if (res_index == 0) { // Unichar OK, but count if multiple unichars. if (shape_table.GetShape(results[res_index].shape_id).size() > 1) { ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR]; } } else { // Count maps from unichar id to shape id. if (num_results > 0) ++unichar_counts_(unichar_id, results[0].shape_id); // This is a unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR]; if (boosting_mode == CT_UNICHAR_TOP1_ERR) sample->set_is_error(true); if (res_index >= MIN(2, num_results)) { // It is also a 2nd choice unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR]; if (boosting_mode == CT_UNICHAR_TOP2_ERR) sample->set_is_error(true); } if (res_index >= num_results) { // It is also a top-n choice unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR]; if (boosting_mode == CT_UNICHAR_TOPN_ERR) sample->set_is_error(true); debug_it = debug; } } } // Compute mean number of return values and mean rank of correct answer. font_counts_[font_id].n[CT_NUM_RESULTS] += num_results; font_counts_[font_id].n[CT_RANK] += res_index; // If it was an error for boosting then sum the weight. if (sample->is_error()) { scaled_error_ += sample->weight(); } if (debug_it) { tprintf("%d results for char %s font %d :", num_results, shape_table.unicharset().id_to_unichar(unichar_id), font_id); for (int i = 0; i < num_results; ++i) { tprintf(" %.3f/%.3f:%s", results[i].rating, results[i].font, shape_table.DebugStr(results[i].shape_id).string()); } tprintf("\n"); return true; } return false; }
void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set, TFile *ambig_file, int debug_level, bool use_ambigs_for_adaption, UNICHARSET *unicharset) { int i, j; UnicharIdVector *adaption_ambigs_entry; if (debug_level) tprintf("Reading ambiguities\n"); int test_ambig_part_size; int replacement_ambig_part_size; // The space for buffer is allocated on the heap to avoid // GCC frame size warning. const int kBufferSize = 10 + 2 * kMaxAmbigStringSize; char *buffer = new char[kBufferSize]; char replacement_string[kMaxAmbigStringSize]; UNICHAR_ID test_unichar_ids[MAX_AMBIG_SIZE + 1]; int line_num = 0; int type = NOT_AMBIG; // Determine the version of the ambigs file. int version = 0; ASSERT_HOST(ambig_file->FGets(buffer, kBufferSize) != NULL && strlen(buffer) > 0); if (*buffer == 'v') { version = static_cast<int>(strtol(buffer+1, NULL, 10)); ++line_num; } else { ambig_file->Rewind(); } while (ambig_file->FGets(buffer, kBufferSize) != NULL) { chomp_string(buffer); if (debug_level > 2) tprintf("read line %s\n", buffer); ++line_num; if (!ParseAmbiguityLine(line_num, version, debug_level, encoder_set, buffer, &test_ambig_part_size, test_unichar_ids, &replacement_ambig_part_size, replacement_string, &type)) continue; // Construct AmbigSpec and add it to the appropriate AmbigSpec_LIST. AmbigSpec *ambig_spec = new AmbigSpec(); if (!InsertIntoTable((type == REPLACE_AMBIG) ? replace_ambigs_ : dang_ambigs_, test_ambig_part_size, test_unichar_ids, replacement_ambig_part_size, replacement_string, type, ambig_spec, unicharset)) continue; // Update one_to_one_definite_ambigs_. if (test_ambig_part_size == 1 && replacement_ambig_part_size == 1 && type == DEFINITE_AMBIG) { if (one_to_one_definite_ambigs_[test_unichar_ids[0]] == NULL) { one_to_one_definite_ambigs_[test_unichar_ids[0]] = new UnicharIdVector(); } one_to_one_definite_ambigs_[test_unichar_ids[0]]->push_back( ambig_spec->correct_ngram_id); } // Update ambigs_for_adaption_. if (use_ambigs_for_adaption) { GenericVector<UNICHAR_ID> encoding; // Silently ignore invalid strings, as before, so it is safe to use a // universal ambigs file. if (unicharset->encode_string(replacement_string, true, &encoding, NULL, NULL)) { for (i = 0; i < test_ambig_part_size; ++i) { if (ambigs_for_adaption_[test_unichar_ids[i]] == NULL) { ambigs_for_adaption_[test_unichar_ids[i]] = new UnicharIdVector(); } adaption_ambigs_entry = ambigs_for_adaption_[test_unichar_ids[i]]; for (int r = 0; r < encoding.size(); ++r) { UNICHAR_ID id_to_insert = encoding[r]; ASSERT_HOST(id_to_insert != INVALID_UNICHAR_ID); // Add the new unichar id to adaption_ambigs_entry (only if the // vector does not already contain it) keeping it in sorted order. for (j = 0; j < adaption_ambigs_entry->size() && (*adaption_ambigs_entry)[j] > id_to_insert; ++j); if (j < adaption_ambigs_entry->size()) { if ((*adaption_ambigs_entry)[j] != id_to_insert) { adaption_ambigs_entry->insert(id_to_insert, j); } } else { adaption_ambigs_entry->push_back(id_to_insert); } } } } } } delete[] buffer; // Fill in reverse_ambigs_for_adaption from ambigs_for_adaption vector. if (use_ambigs_for_adaption) { for (i = 0; i < ambigs_for_adaption_.size(); ++i) { adaption_ambigs_entry = ambigs_for_adaption_[i]; if (adaption_ambigs_entry == NULL) continue; for (j = 0; j < adaption_ambigs_entry->size(); ++j) { UNICHAR_ID ambig_id = (*adaption_ambigs_entry)[j]; if (reverse_ambigs_for_adaption_[ambig_id] == NULL) { reverse_ambigs_for_adaption_[ambig_id] = new UnicharIdVector(); } reverse_ambigs_for_adaption_[ambig_id]->push_back(i); } } } // Print what was read from the input file. if (debug_level > 1) { for (int tbl = 0; tbl < 2; ++tbl) { const UnicharAmbigsVector &print_table = (tbl == 0) ? replace_ambigs_ : dang_ambigs_; for (i = 0; i < print_table.size(); ++i) { AmbigSpec_LIST *lst = print_table[i]; if (lst == NULL) continue; if (!lst->empty()) { tprintf("%s Ambiguities for %s:\n", (tbl == 0) ? "Replaceable" : "Dangerous", unicharset->debug_str(i).string()); } AmbigSpec_IT lst_it(lst); for (lst_it.mark_cycle_pt(); !lst_it.cycled_list(); lst_it.forward()) { AmbigSpec *ambig_spec = lst_it.data(); tprintf("wrong_ngram:"); UnicharIdArrayUtils::print(ambig_spec->wrong_ngram, *unicharset); tprintf("correct_fragments:"); UnicharIdArrayUtils::print(ambig_spec->correct_fragments, *unicharset); } } } if (use_ambigs_for_adaption) { for (int vec_id = 0; vec_id < 2; ++vec_id) { const GenericVector<UnicharIdVector *> &vec = (vec_id == 0) ? ambigs_for_adaption_ : reverse_ambigs_for_adaption_; for (i = 0; i < vec.size(); ++i) { adaption_ambigs_entry = vec[i]; if (adaption_ambigs_entry != NULL) { tprintf("%sAmbigs for adaption for %s:\n", (vec_id == 0) ? "" : "Reverse ", unicharset->debug_str(i).string()); for (j = 0; j < adaption_ambigs_entry->size(); ++j) { tprintf("%s ", unicharset->debug_str( (*adaption_ambigs_entry)[j]).string()); } tprintf("\n"); } } } } } }
bool UnicharAmbigs::ParseAmbiguityLine( int line_num, int version, int debug_level, const UNICHARSET &unicharset, char *buffer, int *test_ambig_part_size, UNICHAR_ID *test_unichar_ids, int *replacement_ambig_part_size, char *replacement_string, int *type) { if (version > 1) { // Simpler format is just wrong-string correct-string type\n. STRING input(buffer); GenericVector<STRING> fields; input.split(' ', &fields); if (fields.size() != 3) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } // Encode wrong-string. GenericVector<UNICHAR_ID> unichars; if (!unicharset.encode_string(fields[0].string(), true, &unichars, NULL, NULL)) { return false; } *test_ambig_part_size = unichars.size(); if (*test_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } // Copy encoded string to output. for (int i = 0; i < unichars.size(); ++i) test_unichar_ids[i] = unichars[i]; test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID; // Encode replacement-string to check validity. if (!unicharset.encode_string(fields[1].string(), true, &unichars, NULL, NULL)) { return false; } *replacement_ambig_part_size = unichars.size(); if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } if (sscanf(fields[2].string(), "%d", type) != 1) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].string()); return true; } int i; char *token; char *next_token; if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", test_ambig_part_size) || *test_ambig_part_size <= 0) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*test_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } for (i = 0; i < *test_ambig_part_size; ++i) { if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; if (!unicharset.contains_unichar(token)) { if (debug_level) tprintf(kIllegalUnicharMsg, token); break; } test_unichar_ids[i] = unicharset.unichar_to_id(token); } test_unichar_ids[i] = INVALID_UNICHAR_ID; if (i != *test_ambig_part_size || !(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", replacement_ambig_part_size) || *replacement_ambig_part_size <= 0) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } replacement_string[0] = '\0'; for (i = 0; i < *replacement_ambig_part_size; ++i) { if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; strcat(replacement_string, token); if (!unicharset.contains_unichar(token)) { if (debug_level) tprintf(kIllegalUnicharMsg, token); break; } } if (i != *replacement_ambig_part_size) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (version > 0) { // The next field being true indicates that the abiguity should // always be substituted (e.g. '' should always be changed to "). // For such "certain" n -> m ambigs tesseract will insert character // fragments for the n pieces in the unicharset. AmbigsFound() // will then replace the incorrect ngram with the character // fragments of the correct character (or ngram if m > 1). // Note that if m > 1, an ngram will be inserted into the // modified word, not the individual unigrams. Tesseract // has limited support for ngram unichar (e.g. dawg permuter). if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", type)) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } } return true; }
// Gets the properties for a grapheme string, combining properties for // multiple characters in a meaningful way where possible. // Returns false if no valid match was found in the unicharset. // NOTE that script_id, mirror, and other_case refer to this unicharset on // return and will need translation if the target unicharset is different. bool UNICHARSET::GetStrProperties(const char* utf8_str, UNICHAR_PROPERTIES* props) const { props->Init(); props->SetRangesEmpty(); props->min_advance = 0; props->max_advance = 0; int total_unicodes = 0; GenericVector<UNICHAR_ID> encoding; if (!encode_string(utf8_str, true, &encoding, NULL, NULL)) return false; // Some part was invalid. for (int i = 0; i < encoding.size(); ++i) { int id = encoding[i]; const UNICHAR_PROPERTIES& src_props = unichars[id].properties; // Logical OR all the bools. if (src_props.isalpha) props->isalpha = true; if (src_props.islower) props->islower = true; if (src_props.isupper) props->isupper = true; if (src_props.isdigit) props->isdigit = true; if (src_props.ispunctuation) props->ispunctuation = true; if (src_props.isngram) props->isngram = true; if (src_props.enabled) props->enabled = true; // Min/max the tops/bottoms. UpdateRange(src_props.min_bottom, &props->min_bottom, &props->max_bottom); UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom); UpdateRange(src_props.min_top, &props->min_top, &props->max_top); UpdateRange(src_props.max_top, &props->min_top, &props->max_top); int bearing = ClipToRange(props->min_advance + src_props.min_bearing, -MAX_INT16, MAX_INT16); if (total_unicodes == 0 || bearing < props->min_bearing) props->min_bearing = bearing; bearing = ClipToRange(props->max_advance + src_props.max_bearing, -MAX_INT16, MAX_INT16); if (total_unicodes == 0 || bearing < props->max_bearing) props->max_bearing = bearing; props->min_advance = ClipToRange(props->min_advance + src_props.min_advance, -MAX_INT16, MAX_INT16); props->max_advance = ClipToRange(props->max_advance + src_props.max_advance, -MAX_INT16, MAX_INT16); // With a single width, just use the widths stored in the unicharset. props->min_width = src_props.min_width; props->max_width = src_props.max_width; // Use the first script id, other_case, mirror, direction. // Note that these will need translation, except direction. if (total_unicodes == 0) { props->script_id = src_props.script_id; props->other_case = src_props.other_case; props->mirror = src_props.mirror; props->direction = src_props.direction; } // The normed string for the compound character is the concatenation of // the normed versions of the individual characters. props->normed += src_props.normed; ++total_unicodes; } if (total_unicodes > 1) { // Estimate the total widths from the advance - bearing. props->min_width = ClipToRange(props->min_advance - props->max_bearing, -MAX_INT16, MAX_INT16); props->max_width = ClipToRange(props->max_advance - props->min_bearing, -MAX_INT16, MAX_INT16); } return total_unicodes > 0; }
// Accumulates the errors from the classifier results on a single sample. // Returns true if debug is true and a CT_UNICHAR_TOPN_ERR error occurred. // boosting_mode selects the type of error to be used for boosting and the // is_error_ member of sample is set according to whether the required type // of error occurred. The font_table provides access to font properties // for error counting and shape_table is used to understand the relationship // between unichar_ids and shape_ids in the results bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode, const FontInfoTable& font_table, const GenericVector<UnicharRating>& results, TrainingSample* sample) { int num_results = results.size(); int answer_actual_rank = -1; int font_id = sample->font_id(); int unichar_id = sample->class_id(); sample->set_is_error(false); if (num_results == 0) { // Reject. We count rejects as a separate category, but still mark the // sample as an error in case any training module wants to use that to // improve the classifier. sample->set_is_error(true); ++font_counts_[font_id].n[CT_REJECT]; } else { // Find rank of correct unichar answer, using rating_epsilon_ to allow // different answers to score as equal. (Ignoring the font.) int epsilon_rank = 0; int answer_epsilon_rank = -1; int num_top_answers = 0; double prev_rating = results[0].rating; bool joined = false; bool broken = false; int res_index = 0; while (res_index < num_results) { if (results[res_index].rating < prev_rating - rating_epsilon_) { ++epsilon_rank; prev_rating = results[res_index].rating; } if (results[res_index].unichar_id == unichar_id && answer_epsilon_rank < 0) { answer_epsilon_rank = epsilon_rank; answer_actual_rank = res_index; } if (results[res_index].unichar_id == UNICHAR_JOINED && unicharset_.has_special_codes()) joined = true; else if (results[res_index].unichar_id == UNICHAR_BROKEN && unicharset_.has_special_codes()) broken = true; else if (epsilon_rank == 0) ++num_top_answers; ++res_index; } if (answer_actual_rank != 0) { // Correct result is not absolute top. ++font_counts_[font_id].n[CT_UNICHAR_TOPTOP_ERR]; if (boosting_mode == CT_UNICHAR_TOPTOP_ERR) sample->set_is_error(true); } if (answer_epsilon_rank == 0) { ++font_counts_[font_id].n[CT_UNICHAR_TOP_OK]; // Unichar OK, but count if multiple unichars. if (num_top_answers > 1) { ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR]; ++multi_unichar_counts_[unichar_id]; } // Check to see if any font in the top choice has attributes that match. // TODO(rays) It is easy to add counters for individual font attributes // here if we want them. if (font_table.SetContainsFontProperties( font_id, results[answer_actual_rank].fonts)) { // Font attributes were matched. // Check for multiple properties. if (font_table.SetContainsMultipleFontProperties( results[answer_actual_rank].fonts)) ++font_counts_[font_id].n[CT_OK_MULTI_FONT]; } else { // Font attributes weren't matched. ++font_counts_[font_id].n[CT_FONT_ATTR_ERR]; } } else { // This is a top unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR]; if (boosting_mode == CT_UNICHAR_TOP1_ERR) sample->set_is_error(true); // Count maps from unichar id to wrong unichar id. ++unichar_counts_(unichar_id, results[0].unichar_id); if (answer_epsilon_rank < 0 || answer_epsilon_rank >= 2) { // It is also a 2nd choice unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR]; if (boosting_mode == CT_UNICHAR_TOP2_ERR) sample->set_is_error(true); } if (answer_epsilon_rank < 0) { // It is also a top-n choice unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR]; if (boosting_mode == CT_UNICHAR_TOPN_ERR) sample->set_is_error(true); answer_epsilon_rank = epsilon_rank; } } // Compute mean number of return values and mean rank of correct answer. font_counts_[font_id].n[CT_NUM_RESULTS] += num_results; font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank; if (joined) ++font_counts_[font_id].n[CT_OK_JOINED]; if (broken) ++font_counts_[font_id].n[CT_OK_BROKEN]; } // If it was an error for boosting then sum the weight. if (sample->is_error()) { scaled_error_ += sample->weight(); if (debug) { tprintf("%d results for char %s font %d :", num_results, unicharset_.id_to_unichar(unichar_id), font_id); for (int i = 0; i < num_results; ++i) { tprintf(" %.3f : %s\n", results[i].rating, unicharset_.id_to_unichar(results[i].unichar_id)); } return true; } int percent = 0; if (num_results > 0) percent = IntCastRounded(results[0].rating * 100); bad_score_hist_.add(percent, 1); } else { int percent = 0; if (answer_actual_rank >= 0) percent = IntCastRounded(results[answer_actual_rank].rating * 100); ok_score_hist_.add(percent, 1); } return false; }
/********************************************************************** * select_blob_to_split * * These are the results of the last classification. Find a likely * place to apply splits. If none, return -1. **********************************************************************/ int Wordrec::select_blob_to_split( const GenericVector<BLOB_CHOICE*>& blob_choices, float rating_ceiling, bool split_next_to_fragment) { BLOB_CHOICE *blob_choice; int x; float worst = -MAX_FLOAT32; int worst_index = -1; float worst_near_fragment = -MAX_FLOAT32; int worst_index_near_fragment = -1; const CHAR_FRAGMENT **fragments = NULL; if (chop_debug) { if (rating_ceiling < MAX_FLOAT32) tprintf("rating_ceiling = %8.4f\n", rating_ceiling); else tprintf("rating_ceiling = No Limit\n"); } if (split_next_to_fragment && blob_choices.size() > 0) { fragments = new const CHAR_FRAGMENT *[blob_choices.length()]; if (blob_choices[0] != NULL) { fragments[0] = getDict().getUnicharset().get_fragment( blob_choices[0]->unichar_id()); } else { fragments[0] = NULL; } } for (x = 0; x < blob_choices.size(); ++x) { if (blob_choices[x] == NULL) { if (fragments != NULL) { delete[] fragments; } return x; } else { blob_choice = blob_choices[x]; // Populate fragments for the following position. if (split_next_to_fragment && x+1 < blob_choices.size()) { if (blob_choices[x + 1] != NULL) { fragments[x + 1] = getDict().getUnicharset().get_fragment( blob_choices[x + 1]->unichar_id()); } else { fragments[x + 1] = NULL; } } if (blob_choice->rating() < rating_ceiling && blob_choice->certainty() < tessedit_certainty_threshold) { // Update worst and worst_index. if (blob_choice->rating() > worst) { worst_index = x; worst = blob_choice->rating(); } if (split_next_to_fragment) { // Update worst_near_fragment and worst_index_near_fragment. bool expand_following_fragment = (x + 1 < blob_choices.size() && fragments[x+1] != NULL && !fragments[x+1]->is_beginning()); bool expand_preceding_fragment = (x > 0 && fragments[x-1] != NULL && !fragments[x-1]->is_ending()); if ((expand_following_fragment || expand_preceding_fragment) && blob_choice->rating() > worst_near_fragment) { worst_index_near_fragment = x; worst_near_fragment = blob_choice->rating(); if (chop_debug) { tprintf("worst_index_near_fragment=%d" " expand_following_fragment=%d" " expand_preceding_fragment=%d\n", worst_index_near_fragment, expand_following_fragment, expand_preceding_fragment); } } } } } } if (fragments != NULL) { delete[] fragments; } // TODO(daria): maybe a threshold of badness for // worst_near_fragment would be useful. return worst_index_near_fragment != -1 ? worst_index_near_fragment : worst_index; }
//----------------------------------------------------------------------------- void FunctionSpace::interpolate(GenericVector& expansion_coefficients, const GenericFunction& v) const { dolfin_assert(_mesh); dolfin_assert(_element); dolfin_assert(_dofmap); // Check that function ranks match if (_element->value_rank() != v.value_rank()) { dolfin_error("FunctionSpace.cpp", "interpolate function into function space", "Rank of function (%d) does not match rank of function space (%d)", v.value_rank(), element()->value_rank()); } // Check that function dims match for (std::size_t i = 0; i < _element->value_rank(); ++i) { if (_element->value_dimension(i) != v.value_dimension(i)) { dolfin_error("FunctionSpace.cpp", "interpolate function into function space", "Dimension %d of function (%d) does not match dimension %d of function space (%d)", i, v.value_dimension(i), i, element()->value_dimension(i)); } } // Initialize vector of expansion coefficients if (expansion_coefficients.size() != _dofmap->global_dimension()) { dolfin_error("FunctionSpace.cpp", "interpolate function into function space", "Wrong size of vector"); } expansion_coefficients.zero(); // Initialize local arrays std::vector<double> cell_coefficients(_dofmap->max_element_dofs()); // Iterate over mesh and interpolate on each cell ufc::cell ufc_cell; std::vector<double> vertex_coordinates; for (CellIterator cell(*_mesh); !cell.end(); ++cell) { // Update to current cell cell->get_vertex_coordinates(vertex_coordinates); cell->get_cell_data(ufc_cell); // Restrict function to cell v.restrict(cell_coefficients.data(), *_element, *cell, vertex_coordinates.data(), ufc_cell); // Tabulate dofs const ArrayView<const dolfin::la_index> cell_dofs = _dofmap->cell_dofs(cell->index()); // Copy dofs to vector expansion_coefficients.set_local(cell_coefficients.data(), _dofmap->num_element_dofs(cell->index()), cell_dofs.data()); } // Finalise changes expansion_coefficients.apply("insert"); }
/// Recursive helper to find a match to the target_text (from text_index /// position) in the choices (from choices_pos position). /// @param choices is an array of GenericVectors, of length choices_length, /// with each element representing a starting position in the word, and the /// #GenericVector holding classification results for a sequence of consecutive /// blobs, with index 0 being a single blob, index 1 being 2 blobs etc. /// @param choices_pos /// @param choices_length /// @param target_text /// @param text_index /// @param rating /// @param segmentation /// @param best_rating /// @param best_segmentation void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices, int choices_pos, int choices_length, const GenericVector<UNICHAR_ID>& target_text, int text_index, float rating, GenericVector<int>* segmentation, float* best_rating, GenericVector<int>* best_segmentation) { const UnicharAmbigsVector& table = getDict().getUnicharAmbigs().dang_ambigs(); for (int length = 1; length <= choices[choices_pos].size(); ++length) { // Rating of matching choice or worst choice if no match. float choice_rating = 0.0f; // Find the corresponding best BLOB_CHOICE. BLOB_CHOICE_IT choice_it(choices[choices_pos][length - 1]); for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) { BLOB_CHOICE* choice = choice_it.data(); choice_rating = choice->rating(); UNICHAR_ID class_id = choice->unichar_id(); if (class_id == target_text[text_index]) { break; } // Search ambigs table. if (class_id < table.size() && table[class_id] != NULL) { AmbigSpec_IT spec_it(table[class_id]); for (spec_it.mark_cycle_pt(); !spec_it.cycled_list(); spec_it.forward()) { const AmbigSpec *ambig_spec = spec_it.data(); // We'll only do 1-1. if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID && ambig_spec->correct_ngram_id == target_text[text_index]) break; } if (!spec_it.cycled_list()) break; // Found an ambig. } } if (choice_it.cycled_list()) continue; // No match. segmentation->push_back(length); if (choices_pos + length == choices_length && text_index + 1 == target_text.size()) { // This is a complete match. If the rating is good record a new best. if (applybox_debug > 2) { tprintf("Complete match, rating = %g, best=%g, seglength=%d, best=%d\n", rating + choice_rating, *best_rating, segmentation->size(), best_segmentation->size()); } if (best_segmentation->empty() || rating + choice_rating < *best_rating) { *best_segmentation = *segmentation; *best_rating = rating + choice_rating; } } else if (choices_pos + length < choices_length && text_index + 1 < target_text.size()) { if (applybox_debug > 3) { tprintf("Match found for %d=%s:%s, at %d+%d, recursing...\n", target_text[text_index], unicharset.id_to_unichar(target_text[text_index]), choice_it.data()->unichar_id() == target_text[text_index] ? "Match" : "Ambig", choices_pos, length); } SearchForText(choices, choices_pos + length, choices_length, target_text, text_index + 1, rating + choice_rating, segmentation, best_rating, best_segmentation); if (applybox_debug > 3) { tprintf("End recursion for %d=%s\n", target_text[text_index], unicharset.id_to_unichar(target_text[text_index])); } } segmentation->truncate(segmentation->size() - 1); } }
void ResultIterator::CalculateTextlineOrder( bool paragraph_is_ltr, const GenericVector<StrongScriptDirection> &word_dirs, GenericVectorEqEq<int> *reading_order) { reading_order->truncate(0); if (word_dirs.size() == 0) return; // Take all of the runs of minor direction words and insert them // in reverse order. int minor_direction, major_direction, major_step, start, end; if (paragraph_is_ltr) { start = 0; end = word_dirs.size(); major_step = 1; major_direction = DIR_LEFT_TO_RIGHT; minor_direction = DIR_RIGHT_TO_LEFT; } else { start = word_dirs.size() - 1; end = -1; major_step = -1; major_direction = DIR_RIGHT_TO_LEFT; minor_direction = DIR_LEFT_TO_RIGHT; // Special rule: if there are neutral words at the right most side // of a line adjacent to a left-to-right word in the middle of the // line, we interpret the end of the line as a single LTR sequence. if (word_dirs[start] == DIR_NEUTRAL) { int neutral_end = start; while (neutral_end > 0 && word_dirs[neutral_end] == DIR_NEUTRAL) { neutral_end--; } if (neutral_end >= 0 && word_dirs[neutral_end] == DIR_LEFT_TO_RIGHT) { // LTR followed by neutrals. // Scan for the beginning of the minor left-to-right run. int left = neutral_end; for (int i = left; i >= 0 && word_dirs[i] != DIR_RIGHT_TO_LEFT; i--) { if (word_dirs[i] == DIR_LEFT_TO_RIGHT) left = i; } reading_order->push_back(kMinorRunStart); for (int i = left; i < word_dirs.size(); i++) { reading_order->push_back(i); if (word_dirs[i] == DIR_MIX) reading_order->push_back(kComplexWord); } reading_order->push_back(kMinorRunEnd); start = left - 1; } } } for (int i = start; i != end;) { if (word_dirs[i] == minor_direction) { int j = i; while (j != end && word_dirs[j] != major_direction) j += major_step; if (j == end) j -= major_step; while (j != i && word_dirs[j] != minor_direction) j -= major_step; // [j..i] is a minor direction run. reading_order->push_back(kMinorRunStart); for (int k = j; k != i; k -= major_step) { reading_order->push_back(k); } reading_order->push_back(i); reading_order->push_back(kMinorRunEnd); i = j + major_step; } else { reading_order->push_back(i); if (word_dirs[i] == DIR_MIX) reading_order->push_back(kComplexWord); i += major_step; } } }
//----------------------------------------------------------------------------- std::size_t PETScLUSolver::solve(GenericVector& x, const GenericVector& b, bool transpose) { Timer timer("PETSc LU solver"); dolfin_assert(_ksp); dolfin_assert(_matA); PetscErrorCode ierr; // Downcast matrix and vectors const PETScVector& _b = as_type<const PETScVector>(b); PETScVector& _x = as_type<PETScVector>(x); // Check dimensions if (_matA->size(0) != b.size()) { dolfin_error("PETScLUSolver.cpp", "solve linear system using PETSc LU solver", "Cannot factorize non-square PETSc matrix"); } // Initialize solution vector if required (make compatible with A in // parallel) if (x.empty()) _matA->init_vector(x, 1); // Set PETSc operators (depends on factorization re-use options); //set_petsc_operators(); // Write a pre-solve message pre_report(*_matA); // Get package used to solve system PC pc; ierr = KSPGetPC(_ksp, &pc); if (ierr != 0) petsc_error(ierr, __FILE__, "KSPGetPC"); configure_ksp(_solver_package); // Set number of threads if using PaStiX if (strcmp(_solver_package, MATSOLVERPASTIX) == 0) { const std::size_t num_threads = parameters["num_threads"].is_set() ? parameters["num_threads"] : dolfin::parameters["num_threads"]; PETScOptions::set("-mat_pastix_threadnbr", num_threads); } // Solve linear system const Vec b_petsc = _b.vec(); Vec x_petsc = _x.vec(); if (!transpose) { ierr = KSPSolve(_ksp, b_petsc, x_petsc); if (ierr != 0) petsc_error(ierr, __FILE__, "KSPSolve"); } else { ierr = KSPSolveTranspose(_ksp, b_petsc, x_petsc); if (ierr != 0) petsc_error(ierr, __FILE__, "KSPSolveTranspose"); } // Update ghost values following solve _x.update_ghost_values(); return 1; }
/** * @name go_deeper_dawg_fxn * * If the choice being composed so far could be a dictionary word * keep exploring choices. */ void Dict::go_deeper_dawg_fxn( const char *debug, const BLOB_CHOICE_LIST_VECTOR &char_choices, int char_choice_index, const CHAR_FRAGMENT_INFO *prev_char_frag_info, bool word_ending, WERD_CHOICE *word, float certainties[], float *limit, WERD_CHOICE *best_choice, int *attempts_left, void *void_more_args) { DawgArgs *more_args = static_cast<DawgArgs *>(void_more_args); word_ending = (char_choice_index == char_choices.size()-1); int word_index = word->length() - 1; if (best_choice->rating() < *limit) return; // Look up char in DAWG // If the current unichar is an ngram first try calling // letter_is_okay() for each unigram it contains separately. UNICHAR_ID orig_uch_id = word->unichar_id(word_index); bool checked_unigrams = false; if (getUnicharset().get_isngram(orig_uch_id)) { if (dawg_debug_level) { tprintf("checking unigrams in an ngram %s\n", getUnicharset().debug_str(orig_uch_id).string()); } int num_unigrams = 0; word->remove_last_unichar_id(); GenericVector<UNICHAR_ID> encoding; const char *ngram_str = getUnicharset().id_to_unichar(orig_uch_id); // Since the string came out of the unicharset, failure is impossible. ASSERT_HOST(getUnicharset().encode_string(ngram_str, true, &encoding, nullptr, nullptr)); bool unigrams_ok = true; // Construct DawgArgs that reflect the current state. DawgPositionVector unigram_active_dawgs = *(more_args->active_dawgs); DawgPositionVector unigram_updated_dawgs; DawgArgs unigram_dawg_args(&unigram_active_dawgs, &unigram_updated_dawgs, more_args->permuter); // Check unigrams in the ngram with letter_is_okay(). for (int i = 0; unigrams_ok && i < encoding.size(); ++i) { UNICHAR_ID uch_id = encoding[i]; ASSERT_HOST(uch_id != INVALID_UNICHAR_ID); ++num_unigrams; word->append_unichar_id(uch_id, 1, 0.0, 0.0); unigrams_ok = (this->*letter_is_okay_)( &unigram_dawg_args, word->unichar_id(word_index+num_unigrams-1), word_ending && i == encoding.size() - 1); (*unigram_dawg_args.active_dawgs) = *(unigram_dawg_args.updated_dawgs); if (dawg_debug_level) { tprintf("unigram %s is %s\n", getUnicharset().debug_str(uch_id).string(), unigrams_ok ? "OK" : "not OK"); } } // Restore the word and copy the updated dawg state if needed. while (num_unigrams-- > 0) word->remove_last_unichar_id(); word->append_unichar_id_space_allocated(orig_uch_id, 1, 0.0, 0.0); if (unigrams_ok) { checked_unigrams = true; more_args->permuter = unigram_dawg_args.permuter; *(more_args->updated_dawgs) = *(unigram_dawg_args.updated_dawgs); } } // Check which dawgs from the dawgs_ vector contain the word // up to and including the current unichar. if (checked_unigrams || (this->*letter_is_okay_)( more_args, word->unichar_id(word_index), word_ending)) { // Add a new word choice if (word_ending) { if (dawg_debug_level) { tprintf("found word = %s\n", word->debug_string().string()); } if (strcmp(output_ambig_words_file.string(), "") != 0) { if (output_ambig_words_file_ == nullptr) { output_ambig_words_file_ = fopen(output_ambig_words_file.string(), "wb+"); if (output_ambig_words_file_ == nullptr) { tprintf("Failed to open output_ambig_words_file %s\n", output_ambig_words_file.string()); exit(1); } STRING word_str; word->string_and_lengths(&word_str, nullptr); word_str += " "; fprintf(output_ambig_words_file_, "%s", word_str.string()); } STRING word_str; word->string_and_lengths(&word_str, nullptr); word_str += " "; fprintf(output_ambig_words_file_, "%s", word_str.string()); } WERD_CHOICE *adjusted_word = word; adjusted_word->set_permuter(more_args->permuter); update_best_choice(*adjusted_word, best_choice); } else { // search the next letter // Make updated_* point to the next entries in the DawgPositionVector // arrays (that were originally created in dawg_permute_and_select) ++(more_args->updated_dawgs); // Make active_dawgs and constraints point to the updated ones. ++(more_args->active_dawgs); permute_choices(debug, char_choices, char_choice_index + 1, prev_char_frag_info, word, certainties, limit, best_choice, attempts_left, more_args); // Restore previous state to explore another letter in this position. --(more_args->updated_dawgs); --(more_args->active_dawgs); } } else { if (dawg_debug_level) { tprintf("last unichar not OK at index %d in %s\n", word_index, word->debug_string().string()); } } }
int main(int argc, char **argv) { if ((argc == 2 && strcmp(argv[1], "-v") == 0) || (argc == 2 && strcmp(argv[1], "--version") == 0)) { char *versionStrP; fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); versionStrP = getLeptonicaVersion(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); versionStrP = getImagelibVersions(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); #ifdef USE_OPENCL cl_platform_id platform; cl_uint num_platforms; cl_device_id devices[2]; cl_uint num_devices; char info[256]; int i; fprintf(stderr, " OpenCL info:\n"); clGetPlatformIDs(1, &platform, &num_platforms); fprintf(stderr, " Found %d platforms.\n", num_platforms); clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0); fprintf(stderr, " Platform name: %s.\n", info); clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0); fprintf(stderr, " Version: %s.\n", info); clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices); fprintf(stderr, " Found %d devices.\n", num_devices); for (i = 0; i < num_devices; ++i) { clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0); fprintf(stderr, " Device %d name: %s.\n", i+1, info); } #endif exit(0); } // Make the order of args a bit more forgiving than it used to be. const char* lang = "eng"; const char* image = NULL; const char* outputbase = NULL; const char* datapath = NULL; bool noocr = false; bool list_langs = false; bool print_parameters = false; GenericVector<STRING> vars_vec, vars_values; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; int arg = 1; while (arg < argc && (outputbase == NULL || argv[arg][0] == '-')) { if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) { lang = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "--tessdata-dir") == 0 && arg + 1 < argc) { datapath = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "--user-words") == 0 && arg + 1 < argc) { vars_vec.push_back("user_words_file"); vars_values.push_back(argv[arg + 1]); ++arg; } else if (strcmp(argv[arg], "--user-patterns") == 0 && arg + 1 < argc) { vars_vec.push_back("user_patterns_file"); vars_values.push_back(argv[arg + 1]); ++arg; } else if (strcmp(argv[arg], "--list-langs") == 0) { noocr = true; list_langs = true; } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) { pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1])); ++arg; } else if (strcmp(argv[arg], "--print-parameters") == 0) { noocr = true; print_parameters = true; } else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { // handled properly after api init ++arg; } else if (image == NULL) { image = argv[arg]; } else if (outputbase == NULL) { outputbase = argv[arg]; } ++arg; } if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) { list_langs = true; noocr = true; } if (outputbase == NULL && noocr == false) { fprintf(stderr, "Usage:\n %s imagename|stdin outputbase|stdout " "[options...] [configfile...]\n\n", argv[0]); fprintf(stderr, "OCR options:\n"); fprintf(stderr, " --tessdata-dir /path\tspecify the location of tessdata" " path\n"); fprintf(stderr, " --user-words /path/to/file\tspecify the location of user" " words file\n"); fprintf(stderr, " --user-patterns /path/to/file\tspecify the location of" " user patterns file\n"); fprintf(stderr, " -l lang[+lang]\tspecify language(s) used for OCR\n"); fprintf(stderr, " -c configvar=value\tset value for control parameter.\n" "\t\t\tMultiple -c arguments are allowed.\n"); fprintf(stderr, " -psm pagesegmode\tspecify page segmentation mode.\n"); fprintf(stderr, "These options must occur before any configfile.\n\n"); fprintf(stderr, "pagesegmode values are:\n" " 0 = Orientation and script detection (OSD) only.\n" " 1 = Automatic page segmentation with OSD.\n" " 2 = Automatic page segmentation, but no OSD, or OCR\n" " 3 = Fully automatic page segmentation, but no OSD. (Default)\n" " 4 = Assume a single column of text of variable sizes.\n" " 5 = Assume a single uniform block of vertically aligned text.\n" " 6 = Assume a single uniform block of text.\n" " 7 = Treat the image as a single text line.\n" " 8 = Treat the image as a single word.\n" " 9 = Treat the image as a single word in a circle.\n" " 10 = Treat the image as a single character.\n\n"); fprintf(stderr, "Single options:\n"); fprintf(stderr, " -v --version: version info\n"); fprintf(stderr, " --list-langs: list available languages for tesseract " "engine. Can be used with --tessdata-dir.\n"); fprintf(stderr, " --print-parameters: print tesseract parameters to the " "stdout.\n"); exit(1); } if (outputbase != NULL && strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", tesseract::TessBaseAPI::Version()); } PERF_COUNT_START("Tesseract:main") tesseract::TessBaseAPI api; api.SetOutputName(outputbase); int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc - arg, &vars_vec, &vars_values, false); if (rc) { fprintf(stderr, "Could not initialize tesseract.\n"); exit(1); } char opt1[255], opt2[255]; for (arg = 0; arg < argc; arg++) { if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { strncpy(opt1, argv[arg + 1], 255); char *p = strchr(opt1, '='); if (!p) { fprintf(stderr, "Missing = in configvar assignment\n"); exit(1); } *p = 0; strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255); opt2[254] = 0; ++arg; if (!api.SetVariable(opt1, opt2)) { fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); } } } if (list_langs) { GenericVector<STRING> languages; api.GetAvailableLanguagesAsVector(&languages); fprintf(stderr, "List of available languages (%d):\n", languages.size()); for (int index = 0; index < languages.size(); ++index) { STRING& string = languages[index]; fprintf(stderr, "%s\n", string.string()); } api.End(); exit(0); } if (print_parameters) { FILE* fout = stdout; fprintf(stdout, "Tesseract parameters:\n"); api.PrintVariables(fout); api.End(); exit(0); } // We have 2 possible sources of pagesegmode: a config file and // the command line. For backwards compatibility reasons, the // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the // default for this program is tesseract::PSM_AUTO. We will let // the config file take priority, so the command-line default // can take priority over the tesseract default, so we use the // value from the command line only if the retrieved mode // is still tesseract::PSM_SINGLE_BLOCK, indicating no change // in any config file. Therefore the only way to force // tesseract::PSM_SINGLE_BLOCK is from the command line. // It would be simpler if we could set the value before Init, // but that doesn't work. if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) api.SetPageSegMode(pagesegmode); if (pagesegmode == tesseract::PSM_AUTO_ONLY || pagesegmode == tesseract::PSM_OSD_ONLY) { int ret_val = 0; Pix* pixs = pixRead(image); if (!pixs) { fprintf(stderr, "Cannot open input file: %s\n", image); exit(2); } api.SetImage(pixs); if (pagesegmode == tesseract::PSM_OSD_ONLY) { OSResults osr; if (api.DetectOS(&osr)) { int orient = osr.best_result.orientation_id; int script_id = osr.get_best_script(orient); float orient_oco = osr.best_result.oconfidence; float orient_sco = osr.best_result.sconfidence; tprintf("Orientation: %d\nOrientation in degrees: %d\n" \ "Orientation confidence: %.2f\n" \ "Script: %d\nScript confidence: %.2f\n", orient, OrientationIdToValue(orient), orient_oco, script_id, orient_sco); } else { ret_val = 1; } } else { tesseract::Orientation orientation; tesseract::WritingDirection direction; tesseract::TextlineOrder order; float deskew_angle; tesseract::PageIterator* it = api.AnalyseLayout(); if (it) { it->Orientation(&orientation, &direction, &order, &deskew_angle); tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ "Deskew angle: %.4f\n", orientation, direction, order, deskew_angle); } else { ret_val = 1; } delete it; } pixDestroy(&pixs); exit(ret_val); } bool b; tesseract::PointerVector<tesseract::TessResultRenderer> renderers; api.GetBoolVariable("tessedit_create_hocr", &b); if (b) { bool font_info; api.GetBoolVariable("hocr_font_info", &font_info); renderers.push_back(new tesseract::TessHOcrRenderer(outputbase, font_info)); } api.GetBoolVariable("tessedit_create_pdf", &b); if (b) { renderers.push_back(new tesseract::TessPDFRenderer(outputbase, api.GetDatapath())); } api.GetBoolVariable("tessedit_write_unlv", &b); if (b) renderers.push_back(new tesseract::TessUnlvRenderer(outputbase)); api.GetBoolVariable("tessedit_create_boxfile", &b); if (b) renderers.push_back(new tesseract::TessBoxTextRenderer(outputbase)); api.GetBoolVariable("tessedit_create_txt", &b); if (b) renderers.push_back(new tesseract::TessTextRenderer(outputbase)); if (!renderers.empty()) { // Since the PointerVector auto-deletes, null-out the renderers that are // added to the root, and leave the root in the vector. for (int r = 1; r < renderers.size(); ++r) { renderers[0]->insert(renderers[r]); renderers[r] = NULL; } if (!api.ProcessPages(image, NULL, 0, renderers[0])) { fprintf(stderr, "Error during processing.\n"); exit(1); } } PERF_COUNT_END return 0; // Normal exit }
int main(int argc, char **argv) { #ifdef USING_GETTEXT setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); #endif if ((argc == 2 && strcmp(argv[1], "-v") == 0) || (argc == 2 && strcmp(argv[1], "--version") == 0)) { char *versionStrP; fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); versionStrP = getLeptonicaVersion(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); versionStrP = getImagelibVersions(); fprintf(stderr, " %s\n", versionStrP); lept_free(versionStrP); exit(0); } tesseract::TessBaseAPI api; int rc = api.Init(argv[0], NULL); if (rc) { fprintf(stderr, "Could not initialize tesseract.\n"); exit(1); } if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) { GenericVector<STRING> languages; api.GetAvailableLanguagesAsVector(&languages); fprintf(stderr, "List of available languages (%d):\n", languages.size()); for (int index = 0; index < languages.size(); ++index) { STRING& string = languages[index]; fprintf(stderr, "%s\n", string.string()); } api.Clear(); exit(0); } api.End(); // Make the order of args a bit more forgiving than it used to be. const char* lang = "eng"; const char* image = NULL; const char* output = NULL; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; int arg = 1; while (arg < argc && (output == NULL || argv[arg][0] == '-')) { if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) { lang = argv[arg + 1]; ++arg; } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) { pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1])); ++arg; } else if (image == NULL) { image = argv[arg]; } else if (output == NULL) { output = argv[arg]; } ++arg; } if (output == NULL) { fprintf(stderr, _("Usage:%s imagename outputbase [-l lang] " "[-psm pagesegmode] [configfile...]\n\n"), argv[0]); fprintf(stderr, _("pagesegmode values are:\n" "0 = Orientation and script detection (OSD) only.\n" "1 = Automatic page segmentation with OSD.\n" "2 = Automatic page segmentation, but no OSD, or OCR\n" "3 = Fully automatic page segmentation, but no OSD. (Default)\n" "4 = Assume a single column of text of variable sizes.\n" "5 = Assume a single uniform block of vertically aligned text.\n" "6 = Assume a single uniform block of text.\n" "7 = Treat the image as a single text line.\n" "8 = Treat the image as a single word.\n" "9 = Treat the image as a single word in a circle.\n" "10 = Treat the image as a single character.\n")); fprintf(stderr, _("-l lang and/or -psm pagesegmode must occur before any" "configfile.\n\n")); fprintf(stderr, _("Single options:\n")); fprintf(stderr, _(" -v --version: version info\n")); fprintf(stderr, _(" --list-langs: list available languages for tesseract engine\n")); exit(1); } api.SetOutputName(output); STRING tessdata_dir; truncate_path(argv[0], &tessdata_dir); rc = api.Init(tessdata_dir.string(), lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc - arg, NULL, NULL, false); if (rc) { fprintf(stderr, "Could not initialize tesseract.\n"); exit(1); } // We have 2 possible sources of pagesegmode: a config file and // the command line. For backwards compatability reasons, the // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the // default for this program is tesseract::PSM_AUTO. We will let // the config file take priority, so the command-line default // can take priority over the tesseract default, so we use the // value from the command line only if the retrieved mode // is still tesseract::PSM_SINGLE_BLOCK, indicating no change // in any config file. Therefore the only way to force // tesseract::PSM_SINGLE_BLOCK is from the command line. // It would be simpler if we could set the value before Init, // but that doesn't work. if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) api.SetPageSegMode(pagesegmode); tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", tesseract::TessBaseAPI::Version()); FILE* fin = fopen(image, "rb"); if (fin == NULL) { printf("Cannot open input file: %s\n", image); exit(2); } fclose(fin); PIX *pixs; if ((pixs = pixRead(image)) == NULL) { printf("Unsupported image type.\n"); exit(3); } pixDestroy(&pixs); STRING text_out; if (!api.ProcessPages(image, NULL, 0, &text_out)) { printf("Error during processing.\n"); } bool output_hocr = false; api.GetBoolVariable("tessedit_create_hocr", &output_hocr); bool output_box = false; api.GetBoolVariable("tessedit_create_boxfile", &output_box); STRING outfile = output; outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt"; FILE* fout = fopen(outfile.string(), "wb"); if (fout == NULL) { printf("Cannot create output file %s\n", outfile.string()); exit(1); } fwrite(text_out.string(), 1, text_out.length(), fout); fclose(fout); return 0; // Normal exit }