Esempio n. 1
0
/// Resegments the word to achieve the target_text from the classifier.
/// Returns false if the re-segmentation fails.
/// Uses brute-force combination of up to #kMaxGroupSize adjacent blobs, and
/// applies a full search on the classifier results to find the best classified
/// segmentation. As a compromise to obtain better recall, 1-1 ambiguity
/// substitutions ARE used.
bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
                                 WERD_RES* word_res) {
  // Classify all required combinations of blobs and save results in choices.
  int word_length = word_res->box_word->length();
  GenericVector<BLOB_CHOICE_LIST*>* choices =
      new GenericVector<BLOB_CHOICE_LIST*>[word_length];
  for (int i = 0; i < word_length; ++i) {
    for (int j = 1; j <= kMaxGroupSize && i + j <= word_length; ++j) {
      BLOB_CHOICE_LIST* match_result = classify_piece(
          word_res->seam_array, i, i + j - 1, "Applybox",
          word_res->chopped_word, word_res->blamer_bundle);
      if (applybox_debug > 2) {
        tprintf("%d+%d:", i, j);
        print_ratings_list("Segment:", match_result, unicharset);
      }
      choices[i].push_back(match_result);
    }
  }
  // Search the segmentation graph for the target text. Must be an exact
  // match. Using wildcards makes it difficult to find the correct
  // segmentation even when it is there.
  word_res->best_state.clear();
  GenericVector<int> search_segmentation;
  float best_rating = 0.0f;
  SearchForText(choices, 0, word_length, target_text, 0, 0.0f,
                &search_segmentation, &best_rating, &word_res->best_state);
  for (int i = 0; i < word_length; ++i)
    choices[i].delete_data_pointers();
  delete [] choices;
  if (word_res->best_state.empty()) {
    // Build the original segmentation and if it is the same length as the
    // truth, assume it will do.
    int blob_count = 1;
    for (int s = 0; s < word_res->seam_array.size(); ++s) {
      SEAM* seam = word_res->seam_array[s];
      if (!seam->HasAnySplits()) {
        word_res->best_state.push_back(blob_count);
        blob_count = 1;
      } else {
        ++blob_count;
      }
    }
    word_res->best_state.push_back(blob_count);
    if (word_res->best_state.size() != target_text.size()) {
      word_res->best_state.clear();  // No good. Original segmentation bad size.
      return false;
    }
  }
  word_res->correct_text.clear();
  for (int i = 0; i < target_text.size(); ++i) {
    word_res->correct_text.push_back(
        STRING(unicharset.id_to_unichar(target_text[i])));
  }
  return true;
}
Esempio n. 2
0
/**
 * @name attempt_blob_chop
 *
 * Try to split the this blob after this one.  Check to make sure that
 * it was successful.
 */
SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number,
                                 bool italic_blob,
                                 const GenericVector<SEAM*>& seams) {
  if (repair_unchopped_blobs)
    preserve_outline_tree (blob->outlines);
  TBLOB *other_blob = TBLOB::ShallowCopy(*blob);       /* Make new blob */
  // Insert it into the word.
  word->blobs.insert(other_blob, blob_number + 1);

  SEAM *seam = nullptr;
  if (prioritize_division) {
    TPOINT location;
    if (divisible_blob(blob, italic_blob, &location)) {
      seam = new SEAM(0.0f, location);
    }
  }
  if (seam == nullptr)
    seam = pick_good_seam(blob);
  if (chop_debug) {
    if (seam != nullptr)
      seam->Print("Good seam picked=");
    else
      tprintf("\n** no seam picked *** \n");
  }
  if (seam) {
    seam->ApplySeam(italic_blob, blob, other_blob);
  }

  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
                   seams, seam);
  if (seam == nullptr) {
    if (repair_unchopped_blobs)
      restore_outline_tree(blob->outlines);
    if (allow_blob_division && !prioritize_division) {
      // If the blob can simply be divided into outlines, then do that.
      TPOINT location;
      if (divisible_blob(blob, italic_blob, &location)) {
        other_blob = TBLOB::ShallowCopy(*blob);       /* Make new blob */
        word->blobs.insert(other_blob, blob_number + 1);
        seam = new SEAM(0.0f, location);
        seam->ApplySeam(italic_blob, blob, other_blob);
        seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
                         seams, seam);
      }
    }
  }
  if (seam != nullptr) {
    // Make sure this seam doesn't get chopped again.
    seam->Finalize();
  }
  return seam;
}
Esempio n. 3
0
/**********************************************************************
 * pick_good_seam
 *
 * Find and return a good seam that will split this blob into two pieces.
 * Work from the outlines provided.
 **********************************************************************/
SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
  SeamPile seam_pile(chop_seam_pile_size);
  EDGEPT *points[MAX_NUM_POINTS];
  EDGEPT_CLIST new_points;
  SEAM *seam = NULL;
  TESSLINE *outline;
  inT16 num_points = 0;

#ifndef GRAPHICS_DISABLED
  if (chop_debug > 2)
    wordrec_display_splits.set_value(true);

  draw_blob_edges(blob);
#endif

  PointHeap point_heap(MAX_NUM_POINTS);
  for (outline = blob->outlines; outline; outline = outline->next)
    prioritize_points(outline, &point_heap);

  while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
    points[num_points++] = point_heap.PeekTop().data;
    point_heap.Pop(NULL);
  }

  /* Initialize queue */
  SeamQueue seam_queue(MAX_NUM_SEAMS);

  try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
  try_vertical_splits(points, num_points, &new_points,
                      &seam_queue, &seam_pile, &seam, blob);

  if (seam == NULL) {
    choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile);
  } else if (seam->priority() > chop_good_split) {
    choose_best_seam(&seam_queue, NULL, seam->priority(), &seam, blob,
                     &seam_pile);
  }

  EDGEPT_C_IT it(&new_points);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    EDGEPT *inserted_point = it.data();
    if (seam == NULL || !seam->UsesPoint(inserted_point)) {
      for (outline = blob->outlines; outline; outline = outline->next) {
        if (outline->loop == inserted_point) {
          outline->loop = outline->loop->next;
        }
      }
      remove_edgept(inserted_point);
    }
  }

  if (seam) {
    if (seam->priority() > chop_ok_split) {
      delete seam;
      seam = NULL;
    }
#ifndef GRAPHICS_DISABLED
    else if (wordrec_display_splits) {
      seam->Mark(edge_window);
      if (chop_debug > 2) {
        update_edge_window();
        edge_window_wait();
      }
    }
#endif
  }

  if (chop_debug)
    wordrec_display_splits.set_value(false);

  return (seam);
}
Esempio n. 4
0
/**********************************************************************
 * choose_best_seam
 *
 * Choose the best seam that can be created by assembling this a
 * collection of splits.  A queue of all the possible seams is
 * maintained.  Each new split received is placed in that queue with
 * its partial priority value.  These values in the seam queue are
 * evaluated and combined until a good enough seam is found.  If no
 * further good seams are being found then this function returns to the
 * caller, who will send more splits.  If this function is called with
 * a split of NULL, then no further splits can be supplied by the
 * caller.
 **********************************************************************/
void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
                               PRIORITY priority, SEAM **seam_result,
                               TBLOB *blob, SeamPile *seam_pile) {
  SEAM *seam;
  char str[80];
  float my_priority;
  /* Add seam of split */
  my_priority = priority;
  if (split != NULL) {
    TPOINT split_point = split->point1->pos;
    split_point += split->point2->pos;
    split_point /= 2;
    seam = new SEAM(my_priority, split_point, *split);
    if (chop_debug > 1) seam->Print("Partial priority    ");
    add_seam_to_queue(my_priority, seam, seam_queue);

    if (my_priority > chop_good_split)
      return;
  }

  TBOX bbox = blob->bounding_box();
  /* Queue loop */
  while (!seam_queue->empty()) {
    SeamPair seam_pair;
    seam_queue->Pop(&seam_pair);
    seam = seam_pair.extract_data();
    /* Set full priority */
    my_priority = seam->FullPriority(bbox.left(), bbox.right(),
                                     chop_overlap_knob, chop_centered_maxwidth,
                                     chop_center_knob, chop_width_change_knob);
    if (chop_debug) {
      sprintf (str, "Full my_priority %0.0f,  ", my_priority);
      seam->Print(str);
    }

    if ((*seam_result == NULL || (*seam_result)->priority() > my_priority) &&
        my_priority < chop_ok_split) {
      /* No crossing */
      if (seam->IsHealthy(*blob, chop_min_outline_points,
                          chop_min_outline_area)) {
        delete *seam_result;
        *seam_result = new SEAM(*seam);
        (*seam_result)->set_priority(my_priority);
      } else {
        delete seam;
        seam = NULL;
        my_priority = BAD_PRIORITY;
      }
    }

    if (my_priority < chop_good_split) {
      if (seam)
        delete seam;
      return;                    /* Made good answer */
    }

    if (seam) {
      /* Combine with others */
      if (seam_pile->size() < chop_seam_pile_size) {
        combine_seam(*seam_pile, seam, seam_queue);
        SeamDecPair pair(seam_pair.key(), seam);
        seam_pile->Push(&pair);
      } else if (chop_new_seam_pile &&
                 seam_pile->size() == chop_seam_pile_size &&
                 seam_pile->PeekTop().key() > seam_pair.key()) {
        combine_seam(*seam_pile, seam, seam_queue);
        SeamDecPair pair;
        seam_pile->Pop(&pair);  // pop the worst.
        // Replace the seam in pair (deleting the old one) with
        // the new seam and score, then push back into the heap.
        pair.set_key(seam_pair.key());
        pair.set_data(seam);
        seam_pile->Push(&pair);
      } else {
        delete seam;
      }
    }

    my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
                                      : seam_queue->PeekTop().key();
    if ((my_priority > chop_ok_split) ||
      (my_priority > chop_good_split && split))
      return;
  }
}