Пример #1
0
// Computes an estimate of the line spacing of the block from the median
// of the spacings between adjacent overlapping textlines.
void BaselineBlock::EstimateLineSpacing() {
  GenericVector<float> spacings;
  for (int r = 0; r < rows_.size(); ++r) {
    BaselineRow* row = rows_[r];
    // Exclude silly lines.
    if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
    // Find the first row after row that overlaps it significantly.
    const TBOX& row_box = row->bounding_box();
    int r2;
    for (r2 = r + 1; r2 < rows_.size() &&
         !row_box.major_x_overlap(rows_[r2]->bounding_box());
         ++r2);
    if (r2 < rows_.size()) {
      BaselineRow* row2 = rows_[r2];
      // Exclude silly lines.
      if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
      float spacing = row->SpaceBetween(*row2);
      spacings.push_back(spacing);
    }
  }
  // If we have at least one value, use it, otherwise leave the previous
  // value unchanged.
  if (!spacings.empty()) {
    line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
    if (debug_level_ > 1)
      tprintf("Estimate of linespacing = %g\n", line_spacing_);
  }
}
Пример #2
0
// Reads all boxes from the string. Otherwise, as ReadAllBoxes.
bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data,
                  GenericVector<TBOX>* boxes,
                  GenericVector<STRING>* texts,
                  GenericVector<STRING>* box_texts,
                  GenericVector<int>* pages) {
  STRING box_str(box_data);
  GenericVector<STRING> lines;
  box_str.split('\n', &lines);
  if (lines.empty()) return false;
  int num_boxes = 0;
  for (int i = 0; i < lines.size(); ++i) {
    int page = 0;
    STRING utf8_str;
    TBOX box;
    if (!ParseBoxFileStr(lines[i].string(), &page, &utf8_str, &box)) {
      continue;
    }
    if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) continue;
    if (target_page >= 0 && page != target_page) continue;
    if (boxes != NULL) boxes->push_back(box);
    if (texts != NULL) texts->push_back(utf8_str);
    if (box_texts != NULL) {
      STRING full_text;
      MakeBoxFileStr(utf8_str.string(), box, target_page, &full_text);
      box_texts->push_back(full_text);
    }
    if (pages != NULL) pages->push_back(page);
    ++num_boxes;
  }
  return num_boxes > 0;
}
Пример #3
0
// Fits straight line baselines and computes the skew angle from the
// median angle. Returns true if a good angle is found.
// If use_box_bottoms is false, baseline positions are formed by
// considering the outlines of the blobs.
bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
  if (non_text_block_) return false;
  GenericVector<double> angles;
  for (int r = 0; r < rows_.size(); ++r) {
    BaselineRow* row = rows_[r];
    if (row->FitBaseline(use_box_bottoms)) {
      double angle = row->BaselineAngle();
      angles.push_back(angle);
    }
    if (debug_level_ > 1)
      row->Print();
  }

  if (!angles.empty()) {
    skew_angle_ = MedianOfCircularValues(M_PI, &angles);
    good_skew_angle_ = true;
  } else {
    skew_angle_ = 0.0f;
    good_skew_angle_ = false;
  }
  if (debug_level_ > 0) {
    tprintf("Initial block skew angle = %g, good = %d\n",
            skew_angle_, good_skew_angle_);
  }
  return good_skew_angle_;
}
Пример #4
0
// Return the minimum number of bytes that matches a legal UNICHAR_ID,
// while leaving the rest of the string encodable. Returns 0 if the
// beginning of the string is not encodable.
// WARNING: this function now encodes the whole string for precision.
// Use encode_string in preference to repeatedly calling step.
int UNICHARSET::step(const char* str) const {
  GenericVector<UNICHAR_ID> encoding;
  GenericVector<char> lengths;
  encode_string(str, true, &encoding, &lengths, NULL);
  if (encoding.empty() || encoding[0] == INVALID_UNICHAR_ID) return 0;
  return lengths[0];
}
Пример #5
0
//-----------------------------------------------------------------------------
double dolfin::normalize(GenericVector& x, std::string normalization_type)
{
  if (x.empty())
  {
    dolfin_error("solve.cpp",
                 "normalize vector",
                 "Cannot normalize vector of zero length");
  }

  double c = 0.0;
  if (normalization_type == "l2")
  {
    c = x.norm("l2");
    x /= c;
  }
  else if (normalization_type == "average")
  {
    c = x.sum()/static_cast<double>(x.size());
    x -= c;
  }
  else
  {
    dolfin_error("solve.cpp",
                 "normalize vector",
                 "Unknown normalization type (\"%s\")",
                 normalization_type.c_str());
  }

  return c;
}
Пример #6
0
// Generates training data for training a line recognizer, eg LSTM.
// Breaks the page into lines, according to the boxes, and writes them to a
// serialized DocumentData based on output_basename.
void Tesseract::TrainLineRecognizer(const STRING& input_imagename,
                                    const STRING& output_basename,
                                    BLOCK_LIST *block_list) {
  STRING lstmf_name = output_basename + ".lstmf";
  DocumentData images(lstmf_name);
  if (applybox_page > 0) {
    // Load existing document for the previous pages.
    if (!images.LoadDocument(lstmf_name.string(), "eng", 0, 0, NULL)) {
      tprintf("Failed to read training data from %s!\n", lstmf_name.string());
      return;
    }
  }
  GenericVector<TBOX> boxes;
  GenericVector<STRING> texts;
  // Get the boxes for this page, if there are any.
  if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, NULL,
                    NULL) ||
      boxes.empty()) {
    tprintf("Failed to read boxes from %s\n", input_imagename.string());
    return;
  }
  TrainFromBoxes(boxes, texts, block_list, &images);
  images.Shuffle();
  if (!images.SaveDocument(lstmf_name.string(), NULL)) {
    tprintf("Failed to write training data to %s!\n", lstmf_name.string());
  }
}
Пример #7
0
//-----------------------------------------------------------------------------
void EpetraVector::gather(GenericVector& y,
                          const std::vector<dolfin::la_index>& indices) const
{
  dolfin_assert(_x);

  // Down cast to an EpetraVector
  EpetraVector& _y = as_type<EpetraVector>(y);

  // Create serial communicator
  Epetra_SerialComm epetra_serial_comm;

  // Create map for y
  Epetra_BlockMap target_map(indices.size(), indices.size(), indices.data(),
                             1, 0, epetra_serial_comm);

  // Initialise vector y
  if (y.empty())
    _y.init(target_map);
  else if (_y.size() != indices.size() ||  MPI::size(y.mpi_comm()))
  {
    // FIXME: also check that vector is local
    dolfin_error("EpetraVector.cpp",
                 "gather vector entries",
                 "Cannot re-initialize gather vector. Must be empty, or have correct size and be a local vector");
  }

  dolfin_assert(_y.vec());

  // Create importer
  Epetra_Import importer(target_map, _x->Map());

  // Import values into y
  _y.vec()->Import(*_x, importer, Insert);
}
Пример #8
0
//-----------------------------------------------------------------------------
void BelosKrylovSolver::check_dimensions(const TpetraMatrix& A,
                                         const GenericVector& x,
                                         const GenericVector& b) const
{
  // Check dimensions of A
  if (A.size(0) == 0 || A.size(1) == 0)
  {
    dolfin_error("BelosKrylovSolver.cpp",
                 "unable to solve linear system with Belos Krylov solver",
                 "Matrix does not have a nonzero number of rows and columns");
  }

  // Check dimensions of A vs b
  if (A.size(0) != b.size())
  {
    dolfin_error("BelosKrylovSolver.cpp",
                 "unable to solve linear system with Belos Krylov solver",
                 "Non-matching dimensions for linear system (matrix has %ld rows and right-hand side vector has %ld rows)",
                 A.size(0), b.size());
  }

  // Check dimensions of A vs x
  if (!x.empty() && x.size() != A.size(1))
  {
    dolfin_error("BelosKrylovSolver.cpp",
                 "unable to solve linear system with Belos Krylov solver",
                 "Non-matching dimensions for linear system (matrix has %ld columns and solution vector has %ld rows)",
                 A.size(1), x.size());
  }

}
Пример #9
0
// Returns true if the given set of fonts includes multiple properties.
bool FontInfoTable::SetContainsMultipleFontProperties(
    const GenericVector<int>& font_set) const {
  if (font_set.empty()) return false;
  int first_font = font_set[0];
  uinT32 properties = get(first_font).properties;
  for (int f = 1; f < font_set.size(); ++f) {
    if (get(font_set[f]).properties != properties)
      return true;
  }
  return false;
}
Пример #10
0
/**
 * @name print_seams
 *
 * Print a list of splits.  Show the coordinates of both points in
 * each split.
 */
void print_seams(const char *label, const GenericVector<SEAM*>& seams) {
  char number[CHARS_PER_LINE];

  if (!seams.empty()) {
    tprintf("%s\n", label);
    for (int x = 0; x < seams.size(); ++x) {
      sprintf(number, "%2d:   ", x);
      print_seam(number, seams[x]);
    }
    tprintf("\n");
  }
}
Пример #11
0
GenericVector<char*> M_Utils::lineSplit(const char* txt) {
  int txtlen = (int)strlen(txt);
  // pass 1: find split points
  GenericVector<int> splitpoints;
  for(int i = 0; i < txtlen; i++) {
    if(txt[i] == '\n' && (i < (txtlen-1)))
      splitpoints.push_back(i);
  }
  // pass 2: iterate split points to do all the splitting
  int prevsplit = 0;
  GenericVector<char*> res;
  if(splitpoints.empty()) {
    // deep copy the string
    char* newstr = strDeepCpy(txt);
    res.push_back(newstr);
    return res;
  }
  for(int i = 0; i < splitpoints.length(); i++) {
    int split = splitpoints[i];
    int newstrsize = split-prevsplit;
    char* ln = new char[newstrsize+2]; // +1 for null terminator and +1 for newline
    for(int i = 0; i < newstrsize; i++)
      ln[i] = txt[prevsplit+i];
    ln[newstrsize] = '\n';
    ln[newstrsize+1] = '\0'; // null terminator
    res.push_back(ln);
    splitpoints.clear();
    prevsplit = split;
  }
  // now just need to add the last line
  int lastsplit = prevsplit;
  int newstrsize = txtlen - prevsplit;
  char* ln = new char[newstrsize+1];
  for(int i = 0; i < newstrsize; i++)
    ln[i] = txt[prevsplit+i];
  ln[newstrsize] = '\0';
  res.push_back(ln);
  return res;
}
Пример #12
0
//-----------------------------------------------------------------------------
std::size_t PETScLUSolver::solve(GenericVector& x, const GenericVector& b,
                                 bool transpose)
{
  Timer timer("PETSc LU solver");

  dolfin_assert(_ksp);
  dolfin_assert(_matA);
  PetscErrorCode ierr;

  // Downcast matrix and vectors
  const PETScVector& _b = as_type<const PETScVector>(b);
  PETScVector& _x = as_type<PETScVector>(x);

  // Check dimensions
  if (_matA->size(0) != b.size())
  {
    dolfin_error("PETScLUSolver.cpp",
                 "solve linear system using PETSc LU solver",
                 "Cannot factorize non-square PETSc matrix");
  }

  // Initialize solution vector if required (make compatible with A in
  // parallel)
  if (x.empty())
    _matA->init_vector(x, 1);

  // Set PETSc operators (depends on factorization re-use options);
  //set_petsc_operators();

  // Write a pre-solve message
  pre_report(*_matA);

  // Get package used to solve system
  PC pc;
  ierr = KSPGetPC(_ksp, &pc);
  if (ierr != 0)
    petsc_error(ierr, __FILE__, "KSPGetPC");

  configure_ksp(_solver_package);

  // Set number of threads if using PaStiX
  if (strcmp(_solver_package, MATSOLVERPASTIX) == 0)
  {
    const std::size_t num_threads = parameters["num_threads"].is_set() ?
      parameters["num_threads"] : dolfin::parameters["num_threads"];
    PETScOptions::set("-mat_pastix_threadnbr", num_threads);
  }

  // Solve linear system
  const Vec b_petsc = _b.vec();
  Vec x_petsc = _x.vec();
  if (!transpose)
  {
    ierr = KSPSolve(_ksp, b_petsc, x_petsc);
    if (ierr != 0) petsc_error(ierr, __FILE__, "KSPSolve");
  }
  else
  {
    ierr = KSPSolveTranspose(_ksp, b_petsc, x_petsc);
    if (ierr != 0) petsc_error(ierr, __FILE__, "KSPSolveTranspose");
  }

  // Update ghost values following solve
  _x.update_ghost_values();

  return 1;
}
Пример #13
0
//----------------------------------------------------------------------------
void LocalSolver::solve(GenericVector& x, const Form& a, const Form& L,
                        bool symmetric) const
{
  UFC ufc_a(a);
  UFC ufc_L(L);

  // Set timer
  Timer timer("Local solver");

  // Extract mesh
  const Mesh& mesh = a.mesh();

  // Form ranks
  const std::size_t rank_a = ufc_a.form.rank();
  const std::size_t rank_L = ufc_L.form.rank();

  // Check form ranks
  dolfin_assert(rank_a == 2);
  dolfin_assert(rank_L == 1);

  // Collect pointers to dof maps
  std::shared_ptr<const GenericDofMap> dofmap_a0
    = a.function_space(0)->dofmap();
  std::shared_ptr<const GenericDofMap> dofmap_a1
    = a.function_space(1)->dofmap();
  std::shared_ptr<const GenericDofMap> dofmap_L
    = a.function_space(0)->dofmap();
  dolfin_assert(dofmap_a0);
  dolfin_assert(dofmap_a1);
  dolfin_assert(dofmap_L);

  // Initialise vector
  if (x.empty())
  {
    std::pair<std::size_t, std::size_t> local_range
      = dofmap_L->ownership_range();
    x.init(mesh.mpi_comm(), local_range);
  }

  // Cell integrals
  ufc::cell_integral* integral_a = ufc_a.default_cell_integral.get();
  ufc::cell_integral* integral_L = ufc_L.default_cell_integral.get();

  // Eigen data structures
  Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> A;
  Eigen::VectorXd b, x_local;

  // Assemble over cells
  Progress p("Performing local (cell-wise) solve", mesh.num_cells());
  ufc::cell ufc_cell;
  std::vector<double> vertex_coordinates;
  for (CellIterator cell(mesh); !cell.end(); ++cell)
  {
    // Update to current cell
    cell->get_vertex_coordinates(vertex_coordinates);
    cell->get_cell_data(ufc_cell);
    ufc_a.update(*cell, vertex_coordinates, ufc_cell,
                 integral_a->enabled_coefficients());
    ufc_L.update(*cell, vertex_coordinates, ufc_cell,
                 integral_L->enabled_coefficients());

    // Get local-to-global dof maps for cell
    const std::vector<dolfin::la_index>& dofs_a0
      = dofmap_a0->cell_dofs(cell->index());
    const std::vector<dolfin::la_index>& dofs_a1
      = dofmap_a1->cell_dofs(cell->index());
    const std::vector<dolfin::la_index>& dofs_L
      = dofmap_L->cell_dofs(cell->index());

    // Check that local problem is square and a and L match
    dolfin_assert(dofs_a0.size() == dofs_a1.size());
    dolfin_assert(dofs_a1.size() == dofs_L.size());

    // Resize A and b
    A.resize(dofs_a0.size(), dofs_a1.size());
    b.resize(dofs_L.size());

    // Tabulate A and b on cell
    integral_a->tabulate_tensor(A.data(),
                                ufc_a.w(),
                                vertex_coordinates.data(),
                                ufc_cell.orientation);
    integral_L->tabulate_tensor(b.data(),
                                ufc_L.w(),
                                vertex_coordinates.data(),
                                ufc_cell.orientation);

    // Solve local problem
    x_local = A.partialPivLu().solve(b);

    // Set solution in global vector
    x.set(x_local.data(), dofs_a0.size(), dofs_a0.data());

    p++;
  }

  // Finalise vector
  x.apply("insert");
}