// Computes an estimate of the line spacing of the block from the median // of the spacings between adjacent overlapping textlines. void BaselineBlock::EstimateLineSpacing() { GenericVector<float> spacings; for (int r = 0; r < rows_.size(); ++r) { BaselineRow* row = rows_[r]; // Exclude silly lines. if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue; // Find the first row after row that overlaps it significantly. const TBOX& row_box = row->bounding_box(); int r2; for (r2 = r + 1; r2 < rows_.size() && !row_box.major_x_overlap(rows_[r2]->bounding_box()); ++r2); if (r2 < rows_.size()) { BaselineRow* row2 = rows_[r2]; // Exclude silly lines. if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue; float spacing = row->SpaceBetween(*row2); spacings.push_back(spacing); } } // If we have at least one value, use it, otherwise leave the previous // value unchanged. if (!spacings.empty()) { line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)]; if (debug_level_ > 1) tprintf("Estimate of linespacing = %g\n", line_spacing_); } }
// Reads all boxes from the string. Otherwise, as ReadAllBoxes. bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, GenericVector<TBOX>* boxes, GenericVector<STRING>* texts, GenericVector<STRING>* box_texts, GenericVector<int>* pages) { STRING box_str(box_data); GenericVector<STRING> lines; box_str.split('\n', &lines); if (lines.empty()) return false; int num_boxes = 0; for (int i = 0; i < lines.size(); ++i) { int page = 0; STRING utf8_str; TBOX box; if (!ParseBoxFileStr(lines[i].string(), &page, &utf8_str, &box)) { continue; } if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) continue; if (target_page >= 0 && page != target_page) continue; if (boxes != NULL) boxes->push_back(box); if (texts != NULL) texts->push_back(utf8_str); if (box_texts != NULL) { STRING full_text; MakeBoxFileStr(utf8_str.string(), box, target_page, &full_text); box_texts->push_back(full_text); } if (pages != NULL) pages->push_back(page); ++num_boxes; } return num_boxes > 0; }
// Fits straight line baselines and computes the skew angle from the // median angle. Returns true if a good angle is found. // If use_box_bottoms is false, baseline positions are formed by // considering the outlines of the blobs. bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) { if (non_text_block_) return false; GenericVector<double> angles; for (int r = 0; r < rows_.size(); ++r) { BaselineRow* row = rows_[r]; if (row->FitBaseline(use_box_bottoms)) { double angle = row->BaselineAngle(); angles.push_back(angle); } if (debug_level_ > 1) row->Print(); } if (!angles.empty()) { skew_angle_ = MedianOfCircularValues(M_PI, &angles); good_skew_angle_ = true; } else { skew_angle_ = 0.0f; good_skew_angle_ = false; } if (debug_level_ > 0) { tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_, good_skew_angle_); } return good_skew_angle_; }
// Return the minimum number of bytes that matches a legal UNICHAR_ID, // while leaving the rest of the string encodable. Returns 0 if the // beginning of the string is not encodable. // WARNING: this function now encodes the whole string for precision. // Use encode_string in preference to repeatedly calling step. int UNICHARSET::step(const char* str) const { GenericVector<UNICHAR_ID> encoding; GenericVector<char> lengths; encode_string(str, true, &encoding, &lengths, NULL); if (encoding.empty() || encoding[0] == INVALID_UNICHAR_ID) return 0; return lengths[0]; }
//----------------------------------------------------------------------------- double dolfin::normalize(GenericVector& x, std::string normalization_type) { if (x.empty()) { dolfin_error("solve.cpp", "normalize vector", "Cannot normalize vector of zero length"); } double c = 0.0; if (normalization_type == "l2") { c = x.norm("l2"); x /= c; } else if (normalization_type == "average") { c = x.sum()/static_cast<double>(x.size()); x -= c; } else { dolfin_error("solve.cpp", "normalize vector", "Unknown normalization type (\"%s\")", normalization_type.c_str()); } return c; }
// Generates training data for training a line recognizer, eg LSTM. // Breaks the page into lines, according to the boxes, and writes them to a // serialized DocumentData based on output_basename. void Tesseract::TrainLineRecognizer(const STRING& input_imagename, const STRING& output_basename, BLOCK_LIST *block_list) { STRING lstmf_name = output_basename + ".lstmf"; DocumentData images(lstmf_name); if (applybox_page > 0) { // Load existing document for the previous pages. if (!images.LoadDocument(lstmf_name.string(), "eng", 0, 0, NULL)) { tprintf("Failed to read training data from %s!\n", lstmf_name.string()); return; } } GenericVector<TBOX> boxes; GenericVector<STRING> texts; // Get the boxes for this page, if there are any. if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, NULL, NULL) || boxes.empty()) { tprintf("Failed to read boxes from %s\n", input_imagename.string()); return; } TrainFromBoxes(boxes, texts, block_list, &images); images.Shuffle(); if (!images.SaveDocument(lstmf_name.string(), NULL)) { tprintf("Failed to write training data to %s!\n", lstmf_name.string()); } }
//----------------------------------------------------------------------------- void EpetraVector::gather(GenericVector& y, const std::vector<dolfin::la_index>& indices) const { dolfin_assert(_x); // Down cast to an EpetraVector EpetraVector& _y = as_type<EpetraVector>(y); // Create serial communicator Epetra_SerialComm epetra_serial_comm; // Create map for y Epetra_BlockMap target_map(indices.size(), indices.size(), indices.data(), 1, 0, epetra_serial_comm); // Initialise vector y if (y.empty()) _y.init(target_map); else if (_y.size() != indices.size() || MPI::size(y.mpi_comm())) { // FIXME: also check that vector is local dolfin_error("EpetraVector.cpp", "gather vector entries", "Cannot re-initialize gather vector. Must be empty, or have correct size and be a local vector"); } dolfin_assert(_y.vec()); // Create importer Epetra_Import importer(target_map, _x->Map()); // Import values into y _y.vec()->Import(*_x, importer, Insert); }
//----------------------------------------------------------------------------- void BelosKrylovSolver::check_dimensions(const TpetraMatrix& A, const GenericVector& x, const GenericVector& b) const { // Check dimensions of A if (A.size(0) == 0 || A.size(1) == 0) { dolfin_error("BelosKrylovSolver.cpp", "unable to solve linear system with Belos Krylov solver", "Matrix does not have a nonzero number of rows and columns"); } // Check dimensions of A vs b if (A.size(0) != b.size()) { dolfin_error("BelosKrylovSolver.cpp", "unable to solve linear system with Belos Krylov solver", "Non-matching dimensions for linear system (matrix has %ld rows and right-hand side vector has %ld rows)", A.size(0), b.size()); } // Check dimensions of A vs x if (!x.empty() && x.size() != A.size(1)) { dolfin_error("BelosKrylovSolver.cpp", "unable to solve linear system with Belos Krylov solver", "Non-matching dimensions for linear system (matrix has %ld columns and solution vector has %ld rows)", A.size(1), x.size()); } }
// Returns true if the given set of fonts includes multiple properties. bool FontInfoTable::SetContainsMultipleFontProperties( const GenericVector<int>& font_set) const { if (font_set.empty()) return false; int first_font = font_set[0]; uinT32 properties = get(first_font).properties; for (int f = 1; f < font_set.size(); ++f) { if (get(font_set[f]).properties != properties) return true; } return false; }
/** * @name print_seams * * Print a list of splits. Show the coordinates of both points in * each split. */ void print_seams(const char *label, const GenericVector<SEAM*>& seams) { char number[CHARS_PER_LINE]; if (!seams.empty()) { tprintf("%s\n", label); for (int x = 0; x < seams.size(); ++x) { sprintf(number, "%2d: ", x); print_seam(number, seams[x]); } tprintf("\n"); } }
GenericVector<char*> M_Utils::lineSplit(const char* txt) { int txtlen = (int)strlen(txt); // pass 1: find split points GenericVector<int> splitpoints; for(int i = 0; i < txtlen; i++) { if(txt[i] == '\n' && (i < (txtlen-1))) splitpoints.push_back(i); } // pass 2: iterate split points to do all the splitting int prevsplit = 0; GenericVector<char*> res; if(splitpoints.empty()) { // deep copy the string char* newstr = strDeepCpy(txt); res.push_back(newstr); return res; } for(int i = 0; i < splitpoints.length(); i++) { int split = splitpoints[i]; int newstrsize = split-prevsplit; char* ln = new char[newstrsize+2]; // +1 for null terminator and +1 for newline for(int i = 0; i < newstrsize; i++) ln[i] = txt[prevsplit+i]; ln[newstrsize] = '\n'; ln[newstrsize+1] = '\0'; // null terminator res.push_back(ln); splitpoints.clear(); prevsplit = split; } // now just need to add the last line int lastsplit = prevsplit; int newstrsize = txtlen - prevsplit; char* ln = new char[newstrsize+1]; for(int i = 0; i < newstrsize; i++) ln[i] = txt[prevsplit+i]; ln[newstrsize] = '\0'; res.push_back(ln); return res; }
//----------------------------------------------------------------------------- std::size_t PETScLUSolver::solve(GenericVector& x, const GenericVector& b, bool transpose) { Timer timer("PETSc LU solver"); dolfin_assert(_ksp); dolfin_assert(_matA); PetscErrorCode ierr; // Downcast matrix and vectors const PETScVector& _b = as_type<const PETScVector>(b); PETScVector& _x = as_type<PETScVector>(x); // Check dimensions if (_matA->size(0) != b.size()) { dolfin_error("PETScLUSolver.cpp", "solve linear system using PETSc LU solver", "Cannot factorize non-square PETSc matrix"); } // Initialize solution vector if required (make compatible with A in // parallel) if (x.empty()) _matA->init_vector(x, 1); // Set PETSc operators (depends on factorization re-use options); //set_petsc_operators(); // Write a pre-solve message pre_report(*_matA); // Get package used to solve system PC pc; ierr = KSPGetPC(_ksp, &pc); if (ierr != 0) petsc_error(ierr, __FILE__, "KSPGetPC"); configure_ksp(_solver_package); // Set number of threads if using PaStiX if (strcmp(_solver_package, MATSOLVERPASTIX) == 0) { const std::size_t num_threads = parameters["num_threads"].is_set() ? parameters["num_threads"] : dolfin::parameters["num_threads"]; PETScOptions::set("-mat_pastix_threadnbr", num_threads); } // Solve linear system const Vec b_petsc = _b.vec(); Vec x_petsc = _x.vec(); if (!transpose) { ierr = KSPSolve(_ksp, b_petsc, x_petsc); if (ierr != 0) petsc_error(ierr, __FILE__, "KSPSolve"); } else { ierr = KSPSolveTranspose(_ksp, b_petsc, x_petsc); if (ierr != 0) petsc_error(ierr, __FILE__, "KSPSolveTranspose"); } // Update ghost values following solve _x.update_ghost_values(); return 1; }
//---------------------------------------------------------------------------- void LocalSolver::solve(GenericVector& x, const Form& a, const Form& L, bool symmetric) const { UFC ufc_a(a); UFC ufc_L(L); // Set timer Timer timer("Local solver"); // Extract mesh const Mesh& mesh = a.mesh(); // Form ranks const std::size_t rank_a = ufc_a.form.rank(); const std::size_t rank_L = ufc_L.form.rank(); // Check form ranks dolfin_assert(rank_a == 2); dolfin_assert(rank_L == 1); // Collect pointers to dof maps std::shared_ptr<const GenericDofMap> dofmap_a0 = a.function_space(0)->dofmap(); std::shared_ptr<const GenericDofMap> dofmap_a1 = a.function_space(1)->dofmap(); std::shared_ptr<const GenericDofMap> dofmap_L = a.function_space(0)->dofmap(); dolfin_assert(dofmap_a0); dolfin_assert(dofmap_a1); dolfin_assert(dofmap_L); // Initialise vector if (x.empty()) { std::pair<std::size_t, std::size_t> local_range = dofmap_L->ownership_range(); x.init(mesh.mpi_comm(), local_range); } // Cell integrals ufc::cell_integral* integral_a = ufc_a.default_cell_integral.get(); ufc::cell_integral* integral_L = ufc_L.default_cell_integral.get(); // Eigen data structures Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> A; Eigen::VectorXd b, x_local; // Assemble over cells Progress p("Performing local (cell-wise) solve", mesh.num_cells()); ufc::cell ufc_cell; std::vector<double> vertex_coordinates; for (CellIterator cell(mesh); !cell.end(); ++cell) { // Update to current cell cell->get_vertex_coordinates(vertex_coordinates); cell->get_cell_data(ufc_cell); ufc_a.update(*cell, vertex_coordinates, ufc_cell, integral_a->enabled_coefficients()); ufc_L.update(*cell, vertex_coordinates, ufc_cell, integral_L->enabled_coefficients()); // Get local-to-global dof maps for cell const std::vector<dolfin::la_index>& dofs_a0 = dofmap_a0->cell_dofs(cell->index()); const std::vector<dolfin::la_index>& dofs_a1 = dofmap_a1->cell_dofs(cell->index()); const std::vector<dolfin::la_index>& dofs_L = dofmap_L->cell_dofs(cell->index()); // Check that local problem is square and a and L match dolfin_assert(dofs_a0.size() == dofs_a1.size()); dolfin_assert(dofs_a1.size() == dofs_L.size()); // Resize A and b A.resize(dofs_a0.size(), dofs_a1.size()); b.resize(dofs_L.size()); // Tabulate A and b on cell integral_a->tabulate_tensor(A.data(), ufc_a.w(), vertex_coordinates.data(), ufc_cell.orientation); integral_L->tabulate_tensor(b.data(), ufc_L.w(), vertex_coordinates.data(), ufc_cell.orientation); // Solve local problem x_local = A.partialPivLu().solve(b); // Set solution in global vector x.set(x_local.data(), dofs_a0.size(), dofs_a0.data()); p++; } // Finalise vector x.apply("insert"); }