Esempio n. 1
0
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
int CubeClassifier::ClassifySample(const TrainingSample& sample,
                                   Pix* page_pix, int debug, int keep_this,
                                   GenericVector<ShapeRating>* results) {
  results->clear();
  if (page_pix == NULL) return 0;

  ASSERT_HOST(cube_cntxt_ != NULL);
  const TBOX& char_box = sample.bounding_box();
  CubeObject* cube_obj = new tesseract::CubeObject(
      cube_cntxt_, page_pix, char_box.left(),
      pixGetHeight(page_pix) - char_box.top(),
      char_box.width(), char_box.height());
  CharAltList* alt_list = cube_obj->RecognizeChar();
  alt_list->Sort();
  CharSet* char_set = cube_cntxt_->CharacterSet();
  if (alt_list != NULL) {
    for (int i = 0; i < alt_list->AltCount(); ++i) {
      // Convert cube representation to a shape_id.
      int alt_id = alt_list->Alt(i);
      int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
      int shape_id = shape_table_.FindShape(unichar_id, -1);
      if (shape_id >= 0)
        results->push_back(ShapeRating(shape_id, alt_list->AltProb(i)));
    }
    delete alt_list;
  }
  delete cube_obj;
  return results->size();
}
Esempio n. 2
0
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
int CubeTessClassifier::ClassifySample(const TrainingSample& sample,
                                       Pix* page_pix, int debug, int keep_this,
                                       GenericVector<ShapeRating>* results) {
  int num_results = pruner_->ClassifySample(sample, page_pix, debug, keep_this,
                                            results);
  if (page_pix == NULL) return num_results;

  ASSERT_HOST(cube_cntxt_ != NULL);
  const TBOX& char_box = sample.bounding_box();
  CubeObject* cube_obj = new tesseract::CubeObject(
      cube_cntxt_, page_pix, char_box.left(),
      pixGetHeight(page_pix) - char_box.top(),
      char_box.width(), char_box.height());
  CharAltList* alt_list = cube_obj->RecognizeChar();
  CharSet* char_set = cube_cntxt_->CharacterSet();
  if (alt_list != NULL) {
    for (int r = 0; r < num_results; ++r) {
      const Shape& shape = shape_table_.GetShape((*results)[r].shape_id);
      // Get the best cube probability of all unichars in the shape.
      double best_prob = 0.0;
      for (int i = 0; i < alt_list->AltCount(); ++i) {
        int alt_id = alt_list->Alt(i);
        int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
        if (shape.ContainsUnichar(unichar_id) &&
            alt_list->AltProb(i) > best_prob) {
          best_prob = alt_list->AltProb(i);
        }
      }
      (*results)[r].rating = best_prob;
    }
    delete alt_list;
    // Re-sort by rating.
    results->sort(&ShapeRating::SortDescendingRating);
  }
  delete cube_obj;
  return results->size();
}
// Returns a string debug representation of the given sample:
// font, unichar_str, bounding box, page.
    STRING TrainingSampleSet::SampleToString(const TrainingSample &sample) const {
        STRING boxfile_str;
        MakeBoxFileStr(unicharset_.id_to_unichar(sample.class_id()),
                       sample.bounding_box(), sample.page_num(), &boxfile_str);
        return STRING(fontinfo_table_.get(sample.font_id()).name) + " " + boxfile_str;
    }