// Removes any result that has all its unichars covered by a better choice, // regardless of font. void ShapeClassifier::FilterDuplicateUnichars( GenericVector<ShapeRating>* results) const { GenericVector<ShapeRating> filtered_results; // Copy results to filtered results and knock out duplicate unichars. const ShapeTable* shapes = GetShapeTable(); for (int r = 0; r < results->size(); ++r) { if (r > 0) { const Shape& shape_r = shapes->GetShape((*results)[r].shape_id); int c; for (c = 0; c < shape_r.size(); ++c) { int unichar_id = shape_r[c].unichar_id; int s; for (s = 0; s < r; ++s) { const Shape& shape_s = shapes->GetShape((*results)[s].shape_id); if (shape_s.ContainsUnichar(unichar_id)) break; // We found unichar_id. } if (s == r) break; // We didn't find unichar_id. } if (c == shape_r.size()) continue; // We found all the unichar ids in previous answers. } filtered_results.push_back((*results)[r]); } *results = filtered_results; }
void ShapeClassifier::PrintResults( const char* context, const GenericVector<ShapeRating>& results) const { tprintf("%s\n", context); for (int i = 0; i < results.size(); ++i) { tprintf("%g:", results[i].rating); if (results[i].joined) tprintf("[J]"); if (results[i].broken) tprintf("[B]"); tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).string()); } }
// Returns the shape that contains unichar_id that has the best result. // If result is not NULL, it is set with the shape_id and rating. // Does not need to be overridden if ClassifySample respects the keep_this // rule. int ShapeClassifier::BestShapeForUnichar(const TrainingSample& sample, Pix* page_pix, UNICHAR_ID unichar_id, ShapeRating* result) { GenericVector<ShapeRating> results; const ShapeTable* shapes = GetShapeTable(); int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results); for (int r = 0; r < num_results; ++r) { if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) { if (result != NULL) *result = results[r]; return results[r].shape_id; } } return -1; }
// Classifies the given [training] sample, writing to results. // See shapeclassifier.h for a full description. // Default implementation calls the ShapeRating version. int ShapeClassifier::UnicharClassifySample( const TrainingSample& sample, Pix* page_pix, int debug, UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) { results->truncate(0); GenericVector<ShapeRating> shape_results; int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, &shape_results); const ShapeTable* shapes = GetShapeTable(); GenericVector<int> unichar_map; unichar_map.init_to_size(shapes->unicharset().size(), -1); for (int r = 0; r < num_shape_results; ++r) { shapes->AddShapeToResults(shape_results[r], &unichar_map, results); } return results->size(); }
// Displays classification as the given shape_id. Creates as many windows // as it feels fit, using index as a guide for placement. Adds any created // windows to the windows output and returns a new index that may be used // by any subsequent classifiers. Caller waits for the user to view and // then destroys the windows by clearing the vector. int TessClassifier::DisplayClassifyAs( const TrainingSample& sample, Pix* page_pix, int unichar_id, int index, PointerVector<ScrollView>* windows) { int shape_id = unichar_id; if (GetShapeTable() != NULL) shape_id = BestShapeForUnichar(sample, page_pix, unichar_id, NULL); if (shape_id < 0) return index; if (UnusedClassIdIn(classify_->PreTrainedTemplates, shape_id)) { tprintf("No built-in templates for class/shape %d\n", shape_id); return index; } classify_->ShowBestMatchFor(shape_id, sample.features(), sample.num_features()); return index; }
// Provides access to the UNICHARSET that this classifier works with. // Only needs to be overridden if GetShapeTable() can return NULL. const UNICHARSET& ShapeClassifier::GetUnicharset() const { return GetShapeTable()->unicharset(); }