예제 #1
0
// Removes any result that has all its unichars covered by a better choice,
// regardless of font.
void ShapeClassifier::FilterDuplicateUnichars(
    GenericVector<ShapeRating>* results) const {
  GenericVector<ShapeRating> filtered_results;
  // Copy results to filtered results and knock out duplicate unichars.
  const ShapeTable* shapes = GetShapeTable();
  for (int r = 0; r < results->size(); ++r) {
    if (r > 0) {
      const Shape& shape_r = shapes->GetShape((*results)[r].shape_id);
      int c;
      for (c = 0; c < shape_r.size(); ++c) {
        int unichar_id = shape_r[c].unichar_id;
        int s;
        for (s = 0; s < r; ++s) {
          const Shape& shape_s = shapes->GetShape((*results)[s].shape_id);
          if (shape_s.ContainsUnichar(unichar_id))
            break;  // We found unichar_id.
        }
        if (s == r)
          break;  // We didn't find unichar_id.
      }
      if (c == shape_r.size())
        continue;  // We found all the unichar ids in previous answers.
    }
    filtered_results.push_back((*results)[r]);
  }
  *results = filtered_results;
}
예제 #2
0
void ShapeClassifier::PrintResults(
    const char* context, const GenericVector<ShapeRating>& results) const {
  tprintf("%s\n", context);
  for (int i = 0; i < results.size(); ++i) {
    tprintf("%g:", results[i].rating);
    if (results[i].joined)
      tprintf("[J]");
    if (results[i].broken)
      tprintf("[B]");
    tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).string());
  }
}
예제 #3
0
// Returns the shape that contains unichar_id that has the best result.
// If result is not NULL, it is set with the shape_id and rating.
// Does not need to be overridden if ClassifySample respects the keep_this
// rule.
int ShapeClassifier::BestShapeForUnichar(const TrainingSample& sample,
                                         Pix* page_pix, UNICHAR_ID unichar_id,
                                         ShapeRating* result) {
  GenericVector<ShapeRating> results;
  const ShapeTable* shapes = GetShapeTable();
  int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results);
  for (int r = 0; r < num_results; ++r) {
    if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) {
      if (result != NULL)
        *result = results[r];
      return results[r].shape_id;
    }
  }
  return -1;
}
예제 #4
0
// Classifies the given [training] sample, writing to results.
// See shapeclassifier.h for a full description.
// Default implementation calls the ShapeRating version.
int ShapeClassifier::UnicharClassifySample(
    const TrainingSample& sample, Pix* page_pix, int debug,
    UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
  results->truncate(0);
  GenericVector<ShapeRating> shape_results;
  int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this,
                                         &shape_results);
  const ShapeTable* shapes = GetShapeTable();
  GenericVector<int> unichar_map;
  unichar_map.init_to_size(shapes->unicharset().size(), -1);
  for (int r = 0; r < num_shape_results; ++r) {
    shapes->AddShapeToResults(shape_results[r], &unichar_map, results);
  }
  return results->size();
}
예제 #5
0
// Displays classification as the given shape_id. Creates as many windows
// as it feels fit, using index as a guide for placement. Adds any created
// windows to the windows output and returns a new index that may be used
// by any subsequent classifiers. Caller waits for the user to view and
// then destroys the windows by clearing the vector.
int TessClassifier::DisplayClassifyAs(
    const TrainingSample& sample, Pix* page_pix, int unichar_id, int index,
    PointerVector<ScrollView>* windows) {
    int shape_id = unichar_id;
    if (GetShapeTable() != NULL)
        shape_id = BestShapeForUnichar(sample, page_pix, unichar_id, NULL);
    if (shape_id < 0) return index;
    if (UnusedClassIdIn(classify_->PreTrainedTemplates, shape_id)) {
        tprintf("No built-in templates for class/shape %d\n", shape_id);
        return index;
    }
    classify_->ShowBestMatchFor(shape_id, sample.features(),
                                sample.num_features());
    return index;
}
예제 #6
0
// Provides access to the UNICHARSET that this classifier works with.
// Only needs to be overridden if GetShapeTable() can return NULL.
const UNICHARSET& ShapeClassifier::GetUnicharset() const {
  return GetShapeTable()->unicharset();
}