Ejemplo n.º 1
0
void fixspace_dbg(WERD_RES *word) {
  TBOX box = word->word->bounding_box ();
  BOOL8 show_map_detail = FALSE;
  inT16 i;

  box.print ();
  #ifndef SECURE_NAMES
  tprintf (" \"%s\" ", word->best_choice->string ().string ());
  tprintf ("Blob count: %d (word); %d/%d (outword)\n",
    word->word->gblob_list ()->length (),
    word->outword->gblob_list ()->length (),
    word->outword->rej_blob_list ()->length ());
  word->reject_map.print (debug_fp);
  tprintf ("\n");
  if (show_map_detail) {
    tprintf ("\"%s\"\n", word->best_choice->string ().string ());
    for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {
      tprintf ("**** \"%c\" ****\n", word->best_choice->string ()[i]);
      word->reject_map[i].full_print (debug_fp);
    }
  }

  tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
  tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
  #endif
}
Ejemplo n.º 2
0
// Internal version of EvaluateBox returns the unclipped gradients as well
// as the result of EvaluateBox.
// hgrad1 and hgrad2 are the gradients for the horizontal textline.
int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
                                            const DENORM* denorm, bool debug,
                                            int* hgrad1, int* hgrad2,
                                            int* vgrad1, int* vgrad2) const {
  int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
                                           box.top(), true);
  int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
                                               box.bottom(), false);
  int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
                                               box.top(), true);
  int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
                                                 box.bottom(), box.top(),
                                                 false);
  int top_clipped = MAX(top_gradient, 0);
  int bottom_clipped = MAX(bottom_gradient, 0);
  int left_clipped = MAX(left_gradient, 0);
  int right_clipped = MAX(right_gradient, 0);
  if (debug) {
    tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
            top_gradient, bottom_gradient, left_gradient, right_gradient);
    box.print();
  }
  int result = MAX(top_clipped, bottom_clipped) -
      MAX(left_clipped, right_clipped);
  if (hgrad1 != NULL && hgrad2 != NULL) {
    *hgrad1 = top_gradient;
    *hgrad2 = bottom_gradient;
  }
  if (vgrad1 != NULL && vgrad2 != NULL) {
    *vgrad1 = left_gradient;
    *vgrad2 = right_gradient;
  }
  return result;
}
Ejemplo n.º 3
0
// Find a set of blobs that are aligned in the given vertical
// direction with the given blob. Returns a list of aligned
// blobs and the number in the list.
// For other parameters see FindAlignedBlob below.
int AlignedBlob::AlignTabs(const AlignedBlobParams& params,
                           bool top_to_bottom, BLOBNBOX* bbox,
                           BLOBNBOX_CLIST* good_points, int* end_y) {
  int ptcount = 0;
  BLOBNBOX_C_IT it(good_points);

  TBOX box = bbox->bounding_box();
  bool debug = WithinTestRegion(2, box.left(), box.bottom());
  if (debug) {
    tprintf("Starting alignment run at blob:");
    box.print();
  }
  int x_start = params.right_tab ? box.right() : box.left();
  while (bbox != nullptr) {
    // Add the blob to the list if the appropriate side is a tab candidate,
    // or if we are working on a ragged tab.
    TabType type = params.right_tab ? bbox->right_tab_type()
                                    : bbox->left_tab_type();
    if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) &&
        (it.empty() || it.data() != bbox)) {
      if (top_to_bottom)
        it.add_before_then_move(bbox);
      else
        it.add_after_then_move(bbox);
      ++ptcount;
    }
    // Find the next blob that is aligned with the current one.
    // FindAlignedBlob guarantees that forward progress will be made in the
    // top_to_bottom direction, and therefore eventually it will return nullptr,
    // making this while (bbox != nullptr) loop safe.
    bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y);
    if (bbox != nullptr) {
      box = bbox->bounding_box();
      if (!params.ragged)
        x_start = params.right_tab ? box.right() : box.left();
    }
  }
  if (debug) {
    tprintf("Alignment run ended with %d pts at blob:", ptcount);
    box.print();
  }
  return ptcount;
}
Ejemplo n.º 4
0
/// Consume all source blobs that strongly overlap the given box,
/// putting them into a new word, with the correct_text label.
/// Fights over which box owns which blobs are settled by
/// applying the blobs to box or next_box with the least non-overlap.
/// @return false if the box was in error, which can only be caused by
/// failing to find an overlapping blob for a box.
bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
                                 const TBOX& box, const TBOX& next_box,
                                 const char* correct_text) {
  if (applybox_debug > 1) {
    tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text);
  }
  WERD* new_word = NULL;
  BLOCK_IT b_it(block_list);
  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
    BLOCK* block = b_it.data();
    if (!box.major_overlap(block->bounding_box()))
      continue;
    ROW_IT r_it(block->row_list());
    for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
      ROW* row = r_it.data();
      if (!box.major_overlap(row->bounding_box()))
        continue;
      WERD_IT w_it(row->word_list());
      for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
        WERD* word = w_it.data();
        if (applybox_debug > 2) {
          tprintf("Checking word:");
          word->bounding_box().print();
        }
        if (word->text() != NULL && word->text()[0] != '\0')
          continue;  // Ignore words that are already done.
        if (!box.major_overlap(word->bounding_box()))
          continue;
        C_BLOB_IT blob_it(word->cblob_list());
        for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
             blob_it.forward()) {
          C_BLOB* blob = blob_it.data();
          TBOX blob_box = blob->bounding_box();
          if (!blob_box.major_overlap(box))
            continue;
          double current_box_miss_metric = BoxMissMetric(blob_box, box);
          double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
          if (applybox_debug > 2) {
            tprintf("Checking blob:");
            blob_box.print();
            tprintf("Current miss metric = %g, next = %g\n",
                    current_box_miss_metric, next_box_miss_metric);
          }
          if (current_box_miss_metric > next_box_miss_metric)
            continue;  // Blob is a better match for next box.
          if (applybox_debug > 2) {
            tprintf("Blob match: blob:");
            blob_box.print();
            tprintf("Matches box:");
            box.print();
            tprintf("With next box:");
            next_box.print();
          }
          if (new_word == NULL) {
            // Make a new word with a single blob.
            new_word = word->shallow_copy();
            new_word->set_text(correct_text);
            w_it.add_to_end(new_word);
          }
          C_BLOB_IT new_blob_it(new_word->cblob_list());
          new_blob_it.add_to_end(blob_it.extract());
        }
      }
    }
  }
  if (new_word == NULL && applybox_debug > 0) tprintf("FAIL!\n");
  return new_word != NULL;
}
Ejemplo n.º 5
0
/// Gather consecutive blobs that match the given box into the best_state
/// and corresponding correct_text.
///
/// Fights over which box owns which blobs are settled by pre-chopping and
/// applying the blobs to box or next_box with the least non-overlap.
/// @return false if the box was in error, which can only be caused by
/// failing to find an appropriate blob for a box.
///
/// This means that occasionally, blobs may be incorrectly segmented if the
/// chopper fails to find a suitable chop point.
bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
                                 const TBOX& box, const TBOX& next_box,
                                 const char* correct_text) {
  if (applybox_debug > 1) {
    tprintf("\nAPPLY_BOX: in ResegmentCharBox() for %s\n", correct_text);
  }
  PAGE_RES_IT page_res_it(page_res);
  WERD_RES* word_res;
  for (word_res = page_res_it.word(); word_res != NULL;
       word_res = page_res_it.forward()) {
    if (!word_res->box_word->bounding_box().major_overlap(box))
      continue;
    if (applybox_debug > 1) {
      tprintf("Checking word box:");
      word_res->box_word->bounding_box().print();
    }
    int word_len = word_res->box_word->length();
    for (int i = 0; i < word_len; ++i) {
      TBOX char_box = TBOX();
      int blob_count = 0;
      for (blob_count = 0; i + blob_count < word_len; ++blob_count) {
        TBOX blob_box = word_res->box_word->BlobBox(i + blob_count);
        if (!blob_box.major_overlap(box))
          break;
        if (word_res->correct_text[i + blob_count].length() > 0)
          break;  // Blob is claimed already.
        double current_box_miss_metric = BoxMissMetric(blob_box, box);
        double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
        if (applybox_debug > 2) {
          tprintf("Checking blob:");
          blob_box.print();
          tprintf("Current miss metric = %g, next = %g\n",
                  current_box_miss_metric, next_box_miss_metric);
        }
        if (current_box_miss_metric > next_box_miss_metric)
          break;  // Blob is a better match for next box.
        char_box += blob_box;
      }
      if (blob_count > 0) {
        if (applybox_debug > 1) {
          tprintf("Index [%d, %d) seem good.\n", i, i + blob_count);
        }
        if (!char_box.almost_equal(box, 3) &&
            (box.x_gap(next_box) < -3 ||
             (prev_box != NULL && prev_box->x_gap(box) < -3))) {
          return false;
        }
        // We refine just the box_word, best_state and correct_text here.
        // The rebuild_word is made in TidyUp.
        // blob_count blobs are put together to match the box. Merge the
        // box_word boxes, save the blob_count in the state and the text.
        word_res->box_word->MergeBoxes(i, i + blob_count);
        word_res->best_state[i] = blob_count;
        word_res->correct_text[i] = correct_text;
        if (applybox_debug > 2) {
          tprintf("%d Blobs match: blob box:", blob_count);
          word_res->box_word->BlobBox(i).print();
          tprintf("Matches box:");
          box.print();
          tprintf("With next box:");
          next_box.print();
        }
        // Eliminated best_state and correct_text entries for the consumed
        // blobs.
        for (int j = 1; j < blob_count; ++j) {
          word_res->best_state.remove(i + 1);
          word_res->correct_text.remove(i + 1);
        }
        // Assume that no box spans multiple source words, so we are done with
        // this box.
        if (applybox_debug > 1) {
          tprintf("Best state = ");
          for (int j = 0; j < word_res->best_state.size(); ++j) {
            tprintf("%d ", word_res->best_state[j]);
          }
          tprintf("\n");
          tprintf("Correct text = [[ ");
          for (int j = 0; j < word_res->correct_text.size(); ++j) {
            tprintf("%s ", word_res->correct_text[j].string());
          }
          tprintf("]]\n");
        }
        return true;
      }
    }
  }
  if (applybox_debug > 0) {
    tprintf("FAIL!\n");
  }
  return false;  // Failure.
}