// Helper returns true if the given string is in the vector of strings. static bool IsStrInList(const STRING& str, const GenericVector<STRING>& str_list) { for (int i = 0; i < str_list.size(); ++i) { if (str_list[i] == str) return true; } return false; }
// Helper function to get the index of the first result with the required // unichar_id. If the results are sorted by rating, this will also be the // best result with the required unichar_id. // Returns -1 if the unichar_id is not found int UnicharRating::FirstResultWithUnichar( const GenericVector<UnicharRating>& results, UNICHAR_ID unichar_id) { for (int r = 0; r < results.size(); ++r) { if (results[r].unichar_id == unichar_id) return r; } return -1; }
/** * @name any_shared_split_points * * Return true if any of the splits share a point with this one. */ static int any_shared_split_points(const GenericVector<SEAM*>& seams, SEAM *seam) { int length; int index; length = seams.size(); for (index = 0; index < length; index++) if (seam->SharesPosition(*seams[index])) return TRUE; return FALSE; }
// Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames, const char* lang, FileReader reader) { inT64 fair_share_memory = max_memory_ / filenames.size(); for (int arg = 0; arg < filenames.size(); ++arg) { STRING filename = filenames[arg]; DocumentData* document = new DocumentData(filename); if (document->LoadDocument(filename.string(), lang, 0, fair_share_memory, reader)) { AddToCache(document); } else { tprintf("Failed to load image %s!\n", filename.string()); delete document; } } tprintf("Loaded %d pages, total %gMB\n", total_pages_, memory_used_ / 1048576.0); return total_pages_ > 0; }
//----------------------------------------------------------------------------- double dolfin::residual(const GenericLinearOperator& A, const GenericVector& x, const GenericVector& b) { std::shared_ptr<GenericVector> y = x.factory().create_vector(); A.mult(x, *y); *y -= b; return y->norm("l2"); }
// Computes the maximum x and y value in the features. void WordFeature::ComputeSize(const GenericVector<WordFeature>& features, int* max_x, int* max_y) { *max_x = 0; *max_y = 0; for (int f = 0; f < features.size(); ++f) { if (features[f].x_ > *max_x) *max_x = features[f].x_; if (features[f].y_ > *max_y) *max_y = features[f].y_; } }
// Returns true if the given set of fonts includes one with the same // properties as font_id. bool FontInfoTable::SetContainsFontProperties( int font_id, const GenericVector<int>& font_set) const { uinT32 properties = get(font_id).properties; for (int f = 0; f < font_set.size(); ++f) { if (get(font_set[f]).properties == properties) return true; } return false; }
void Ocr::TesseractApi::initialize(const std::vector<std::string>& ll) { if (api) { api->End(); delete api; } if(!api) api = new tesseract::TessBaseAPI(); std::string langConcat; if (ll.size() > 0) // concat languages { auto iter = ll.begin(); langConcat = *iter; for (; iter != ll.end(); ++iter) { langConcat += "+" + *iter; } } else { langConcat = "eng"; // default } //api->GetAvailableLanguagesAsVector() QString languagePath = (QDir::currentPath() + "/plugins"); if (api->Init(languagePath.toStdString().c_str(), langConcat.c_str())) { QMessageBox messageBox; messageBox.critical(0, "Error", QString("Could not load language files from: ") + languagePath + " (https://github.com/tesseract-ocr/tessdata)"); messageBox.setFixedSize(500, 200); } GenericVector<STRING> languages; api->GetAvailableLanguagesAsVector(&languages); for (int index = 0; index < languages.size(); ++index) { STRING& string = languages[index]; QString str(string.string()); availableLanguages.push_back(str); } }
//----------------------------------------------------------------------------- std::size_t dolfin::solve(const GenericLinearOperator& A, GenericVector& x, const GenericVector& b, std::string method, std::string preconditioner) { Timer timer("Solving linear system"); LinearSolver solver(x.mpi_comm(), method, preconditioner); return solver.solve(A, x, b); }
// Tests a classifier, computing its error rate. // See errorcounter.h for description of arguments. // Iterates over the samples, calling the classifier in normal/silent mode. // If the classifier makes a CT_UNICHAR_TOPN_ERR error, and the appropriate // report_level is set (4 or greater), it will then call the classifier again // with a debug flag and a keep_this argument to find out what is going on. double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, int report_level, CountTypes boosting_mode, const UnicityTable<FontInfo>& fontinfo_table, const GenericVector<Pix*>& page_images, SampleIterator* it, double* unichar_error, double* scaled_error, STRING* fonts_report) { int charsetsize = it->shape_table()->unicharset().size(); int shapesize = it->CompactCharsetSize(); int fontsize = it->sample_set()->NumFonts(); ErrorCounter counter(charsetsize, shapesize, fontsize); GenericVector<ShapeRating> results; clock_t start = clock(); int total_samples = 0; double unscaled_error = 0.0; // Set a number of samples on which to run the classify debug mode. int error_samples = report_level > 3 ? report_level * report_level : 0; // Iterate over all the samples, accumulating errors. for (it->Begin(); !it->AtEnd(); it->Next()) { TrainingSample* mutable_sample = it->MutableSample(); int page_index = mutable_sample->page_num(); Pix* page_pix = 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : NULL; // No debug, no keep this. classifier->ClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results); if (mutable_sample->class_id() == 0) { // This is junk so use the special counter. counter.AccumulateJunk(*it->shape_table(), results, mutable_sample); } else if (counter.AccumulateErrors(report_level > 3, boosting_mode, fontinfo_table, *it->shape_table(), results, mutable_sample) && error_samples > 0) { // Running debug, keep the correct answer, and debug the classifier. tprintf("Error on sample %d: Classifier debug output:\n", it->GlobalSampleIndex()); int keep_this = it->GetSparseClassID(); classifier->ClassifySample(*mutable_sample, page_pix, 1, keep_this, &results); --error_samples; } ++total_samples; } double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC; // Create the appropriate error report. unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it, unichar_error, fonts_report); if (scaled_error != NULL) *scaled_error = counter.scaled_error_; if (report_level > 1) { // It is useful to know the time in microseconds/char. tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time, 1000000.0 * total_time / total_samples); } return unscaled_error; }
// Displays the segmentation state of *this (if not the same as the last // one displayed) and waits for a click in the window. void WERD_CHOICE::DisplaySegmentation(TWERD* word) { #ifndef GRAPHICS_DISABLED // Number of different colors to draw with. const int kNumColors = 6; static ScrollView *segm_window = NULL; // Check the state against the static prev_drawn_state. static GenericVector<int> prev_drawn_state; bool already_done = prev_drawn_state.size() == length_; if (!already_done) prev_drawn_state.init_to_size(length_, 0); for (int i = 0; i < length_; ++i) { if (prev_drawn_state[i] != state_[i]) { already_done = false; } prev_drawn_state[i] = state_[i]; } if (already_done || word->blobs.empty()) return; // Create the window if needed. if (segm_window == NULL) { segm_window = new ScrollView("Segmentation", 5, 10, 500, 256, 2000.0, 256.0, true); } else { segm_window->Clear(); } TBOX bbox; int blob_index = 0; for (int c = 0; c < length_; ++c) { ScrollView::Color color = static_cast<ScrollView::Color>(c % kNumColors + 3); for (int i = 0; i < state_[c]; ++i, ++blob_index) { TBLOB* blob = word->blobs[blob_index]; bbox += blob->bounding_box(); blob->plot(segm_window, color, color); } } segm_window->ZoomToRectangle(bbox.left(), bbox.top(), bbox.right(), bbox.bottom()); segm_window->Update(); window_wait(segm_window); #endif }
//----------------------------------------------------------------------------- void uBLASVector::axpy(double a, const GenericVector& y) { if (size() != y.size()) { dolfin_error("uBLASVector.cpp", "perform axpy operation with uBLAS vector", "Vectors are not of the same size"); } (*_x) += a * as_type<const uBLASVector>(y).vec(); }
// Adds the supplied boxes and transcriptions that correspond to the correct // page number. void ImageData::AddBoxes(const GenericVector<TBOX>& boxes, const GenericVector<STRING>& texts, const GenericVector<int>& box_pages) { // Copy the boxes and make the transcription. for (int i = 0; i < box_pages.size(); ++i) { if (page_number_ >= 0 && box_pages[i] != page_number_) continue; transcription_ += texts[i]; boxes_.push_back(boxes[i]); box_texts_.push_back(texts[i]); } }
// Converts an array of labels to utf-8, whether or not the labels are // augmented with character boundaries. STRING LSTMRecognizer::DecodeLabels(const GenericVector<int>& labels) { STRING result; int end = 1; for (int start = 0; start < labels.size(); start = end) { if (labels[start] == null_char_) { end = start + 1; } else { result += DecodeLabel(labels, start, &end, NULL); } } return result; }
void ShapeClassifier::PrintResults( const char* context, const GenericVector<ShapeRating>& results) const { tprintf("%s\n", context); for (int i = 0; i < results.size(); ++i) { tprintf("%g:", results[i].rating); if (results[i].joined) tprintf("[J]"); if (results[i].broken) tprintf("[B]"); tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).string()); } }
static void PrintScriptDirs(const GenericVector<StrongScriptDirection> &dirs) { for (int i = 0; i < dirs.size(); i++) { switch (dirs[i]) { case DIR_NEUTRAL: tprintf ("N "); break; case DIR_LEFT_TO_RIGHT: tprintf("L "); break; case DIR_RIGHT_TO_LEFT: tprintf("R "); break; case DIR_MIX: tprintf("Z "); break; default: tprintf("? "); break; } } tprintf("\n"); }
void FloatWordFeature::FromWordFeatures( const GenericVector<WordFeature>& word_features, GenericVector<FloatWordFeature>* float_features) { for (int i = 0; i < word_features.size(); ++i) { FloatWordFeature f; f.x = word_features[i].x(); f.y = word_features[i].y(); f.dir = word_features[i].dir(); f.x_bucket = 0; // Will set it later. float_features->push_back(f); } }
void ResultIterator::CalculateTextlineOrder( bool paragraph_is_ltr, const LTRResultIterator &resit, GenericVector<StrongScriptDirection> *dirs_arg, GenericVectorEqEq<int> *word_indices) const { GenericVector<StrongScriptDirection> dirs; GenericVector<StrongScriptDirection> *directions; directions = (dirs_arg != NULL) ? dirs_arg : &dirs; directions->truncate(0); // A LTRResultIterator goes strictly left-to-right word order. LTRResultIterator ltr_it(resit); ltr_it.RestartRow(); if (ltr_it.Empty(RIL_WORD)) return; do { directions->push_back(ltr_it.WordDirection()); } while (ltr_it.Next(RIL_WORD) && !ltr_it.IsAtBeginningOf(RIL_TEXTLINE)); word_indices->truncate(0); CalculateTextlineOrder(paragraph_is_ltr, *directions, word_indices); }
void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) { // Prepare all the blobs. GenericVector<BlobData> blobs; for (int w = 0; w < words.size(); ++w) { if (words[w].word->ratings != NULL && words[w].word->ratings->get(0, 0) == NULL) { for (int b = 0; b < words[w].word->chopped_word->NumBlobs(); ++b) { blobs.push_back(BlobData(b, this, *words[w].word)); } for (int s = 0; s < words[w].lang_words.size(); ++s) { const WERD_RES& word = words[w].lang_words[s]; for (int b = 0; b < word.chopped_word->NumBlobs(); ++b) { blobs.push_back(BlobData(b, sub_langs_[s], word)); } } } } // Pre-classify all the blobs. if (tessedit_parallelize > 1) { #pragma omp parallel for num_threads(10) for (int b = 0; b < blobs.size(); ++b) { *blobs[b].choices = blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL); } } else { // TODO(AMD) parallelize this. for (int b = 0; b < blobs.size(); ++b) { *blobs[b].choices = blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL); } } }
// Set a whitelist and/or blacklist of characters to recognize. // An empty or NULL whitelist enables everything (minus any blacklist). // An empty or NULL blacklist disables nothing. // An empty or NULL blacklist has no effect. void UNICHARSET::set_black_and_whitelist(const char* blacklist, const char* whitelist, const char* unblacklist) { bool def_enabled = whitelist == NULL || whitelist[0] == '\0'; // Set everything to default for (int ch = 0; ch < size_used; ++ch) unichars[ch].properties.enabled = def_enabled; if (!def_enabled) { // Enable the whitelist. GenericVector<UNICHAR_ID> encoding; encode_string(whitelist, false, &encoding, NULL, NULL); for (int i = 0; i < encoding.size(); ++i) { if (encoding[i] != INVALID_UNICHAR_ID) unichars[encoding[i]].properties.enabled = true; } } if (blacklist != NULL && blacklist[0] != '\0') { // Disable the blacklist. GenericVector<UNICHAR_ID> encoding; encode_string(blacklist, false, &encoding, NULL, NULL); for (int i = 0; i < encoding.size(); ++i) { if (encoding[i] != INVALID_UNICHAR_ID) unichars[encoding[i]].properties.enabled = false; } } if (unblacklist != NULL && unblacklist[0] != '\0') { // Re-enable the unblacklist. GenericVector<UNICHAR_ID> encoding; encode_string(unblacklist, false, &encoding, NULL, NULL); for (int i = 0; i < encoding.size(); ++i) { if (encoding[i] != INVALID_UNICHAR_ID) unichars[encoding[i]].properties.enabled = true; } } }
// Computes the features used by the subset of samples defined by // the iterator and sets up the feature mapping. // Returns the size of the compacted feature space. int IntFeatureMap::FindNZFeatureMapping(SampleIterator* it) { feature_map_.Init(feature_space_.Size(), false); int total_samples = 0; for (it->Begin(); !it->AtEnd(); it->Next()) { const TrainingSample& sample = it->GetSample(); GenericVector<int> features; feature_space_.IndexAndSortFeatures(sample.features(), sample.num_features(), &features); int num_features = features.size(); for (int f = 0; f < num_features; ++f) feature_map_.SetMap(features[f], true); ++total_samples; } feature_map_.Setup(); compact_size_ = feature_map_.CompactSize(); mapping_changed_ = true; FinalizeMapping(it); tprintf("%d non-zero features found in %d samples\n", compact_size_, total_samples); return compact_size_; }
//----------------------------------------------------------------------------- void uBLASVector::gather(GenericVector& x, const std::vector<dolfin::la_index>& indices) const { not_working_in_parallel("uBLASVector::gather)"); const std::size_t _size = indices.size(); dolfin_assert(this->size() >= _size); x.resize(_size); ublas_vector& tmp = as_type<uBLASVector>(x).vec(); for (std::size_t i = 0; i < _size; i++) tmp(i) = (*_x)(indices[i]); }
bool Wordrec::ChoiceIsCorrect(const UNICHARSET &uni_set, const WERD_CHOICE *choice, const GenericVector<STRING> &truth_text) { if (choice == NULL) return false; int i; STRING truth_str; for (i = 0; i < truth_text.length(); ++i) truth_str += truth_text[i]; STRING normed_choice_str; for (i = 0; i < choice->length(); ++i) { normed_choice_str += uni_set.get_normed_unichar(choice->unichar_id(i)); } return (truth_str == normed_choice_str); }
// Draws the features in the given window. void WordFeature::Draw(const GenericVector<WordFeature>& features, ScrollView* window) { for (int f = 0; f < features.size(); ++f) { FCOORD pos(features[f].x_, features[f].y_); FCOORD dir; dir.from_direction(features[f].dir_); dir *= 8.0f; window->SetCursor(IntCastRounded(pos.x() - dir.x()), IntCastRounded(pos.y() - dir.y())); window->DrawTo(IntCastRounded(pos.x() + dir.x()), IntCastRounded(pos.y() + dir.y())); } }
// Helper function to get the index of the first result with the required // unichar_id. If the results are sorted by rating, this will also be the // best result with the required unichar_id. // Returns -1 if the unichar_id is not found int ShapeRating::FirstResultWithUnichar( const GenericVector<ShapeRating>& results, const ShapeTable& shape_table, UNICHAR_ID unichar_id) { for (int r = 0; r < results.size(); ++r) { int shape_id = results[r].shape_id; const Shape& shape = shape_table.GetShape(shape_id); if (shape.ContainsUnichar(unichar_id)) { return r; } } return -1; }
// Prints debug information on the results. void ShapeClassifier::UnicharPrintResults( const char* context, const GenericVector<UnicharRating>& results) const { tprintf("%s\n", context); for (int i = 0; i < results.size(); ++i) { tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id, GetUnicharset().id_to_unichar(results[i].unichar_id)); if (results[i].fonts.size() != 0) { tprintf(" Font Vector:"); for (int f = 0; f < results[i].fonts.size(); ++f) { tprintf(" %d", results[i].fonts[f].fontinfo_id); } } tprintf("\n"); } }
GenericVector<char*> M_Utils::lineSplit(const char* txt) { int txtlen = (int)strlen(txt); // pass 1: find split points GenericVector<int> splitpoints; for(int i = 0; i < txtlen; i++) { if(txt[i] == '\n' && (i < (txtlen-1))) splitpoints.push_back(i); } // pass 2: iterate split points to do all the splitting int prevsplit = 0; GenericVector<char*> res; if(splitpoints.empty()) { // deep copy the string char* newstr = strDeepCpy(txt); res.push_back(newstr); return res; } for(int i = 0; i < splitpoints.length(); i++) { int split = splitpoints[i]; int newstrsize = split-prevsplit; char* ln = new char[newstrsize+2]; // +1 for null terminator and +1 for newline for(int i = 0; i < newstrsize; i++) ln[i] = txt[prevsplit+i]; ln[newstrsize] = '\n'; ln[newstrsize+1] = '\0'; // null terminator res.push_back(ln); splitpoints.clear(); prevsplit = split; } // now just need to add the last line int lastsplit = prevsplit; int newstrsize = txtlen - prevsplit; char* ln = new char[newstrsize+1]; for(int i = 0; i < newstrsize; i++) ln[i] = txt[prevsplit+i]; ln[newstrsize] = '\0'; res.push_back(ln); return res; }
/** * @name test_insert_seam * * @returns true if insert_seam will succeed. */ bool test_insert_seam(const GenericVector<SEAM*>& seam_array, TWERD *word, int index) { SEAM *test_seam; int list_length = seam_array.size(); for (int test_index = 0; test_index < index; ++test_index) { test_seam = seam_array[test_index]; if (test_index + test_seam->widthp < index && test_seam->widthp + test_index == index - 1 && account_splits(test_seam, word, test_index + 1, 1) < 0) return false; } for (int test_index = index; test_index < list_length; test_index++) { test_seam = seam_array[test_index]; if (test_index - test_seam->widthn >= index && test_index - test_seam->widthn == index && account_splits(test_seam, word, test_index + 1, -1) < 0) return false; } return true; }
// Accumulates counts for junk. Counts only whether the junk was correctly // rejected or not. void ErrorCounter::AccumulateJunk(const ShapeTable& shape_table, const GenericVector<ShapeRating>& results, TrainingSample* sample) { // For junk we accept no answer, or an explicit shape answer matching the // class id of the sample. int num_results = results.size(); int font_id = sample->font_id(); int unichar_id = sample->class_id(); if (num_results > 0 && !shape_table.GetShape(results[0].shape_id).ContainsUnichar(unichar_id)) { // This is a junk error. ++font_counts_[font_id].n[CT_ACCEPTED_JUNK]; sample->set_is_error(true); // It counts as an error for boosting too so sum the weight. scaled_error_ += sample->weight(); } else { // Correctly rejected. ++font_counts_[font_id].n[CT_REJECTED_JUNK]; sample->set_is_error(false); } }
// Function to compute the near nullspace for elasticity - it is made // up of the six rigid body modes dolfin::VectorSpaceBasis build_nullspace(const dolfin::FunctionSpace& V, const GenericVector& x) { // Get subspaces auto V0 = V.sub(0); auto V1 = V.sub(1); auto V2 = V.sub(2); // Create vectors for nullspace basis std::vector<std::shared_ptr<dolfin::GenericVector>> basis(6); for (std::size_t i = 0; i < basis.size(); ++i) basis[i] = x.copy(); // x0, x1, x2 translations V0->dofmap()->set(*basis[0], 1.0); V1->dofmap()->set(*basis[1], 1.0); V2->dofmap()->set(*basis[2], 1.0); // Rotations V0->set_x(*basis[3], -1.0, 1); V1->set_x(*basis[3], 1.0, 0); V0->set_x(*basis[4], 1.0, 2); V2->set_x(*basis[4], -1.0, 0); V2->set_x(*basis[5], 1.0, 1); V1->set_x(*basis[5], -1.0, 2); // Apply for (std::size_t i = 0; i < basis.size(); ++i) basis[i]->apply("add"); // Create vector space and orthonormalize VectorSpaceBasis vector_space(basis); vector_space.orthonormalize(); return vector_space; }