// Classifies the given [training] sample, writing to results. // See ShapeClassifier for a full description. int CubeClassifier::ClassifySample(const TrainingSample& sample, Pix* page_pix, int debug, int keep_this, GenericVector<ShapeRating>* results) { results->clear(); if (page_pix == NULL) return 0; ASSERT_HOST(cube_cntxt_ != NULL); const TBOX& char_box = sample.bounding_box(); CubeObject* cube_obj = new tesseract::CubeObject( cube_cntxt_, page_pix, char_box.left(), pixGetHeight(page_pix) - char_box.top(), char_box.width(), char_box.height()); CharAltList* alt_list = cube_obj->RecognizeChar(); alt_list->Sort(); CharSet* char_set = cube_cntxt_->CharacterSet(); if (alt_list != NULL) { for (int i = 0; i < alt_list->AltCount(); ++i) { // Convert cube representation to a shape_id. int alt_id = alt_list->Alt(i); int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id)); int shape_id = shape_table_.FindShape(unichar_id, -1); if (shape_id >= 0) results->push_back(ShapeRating(shape_id, alt_list->AltProb(i))); } delete alt_list; } delete cube_obj; return results->size(); }
// Classifies the given [training] sample, writing to results. // See ShapeClassifier for a full description. int CubeTessClassifier::ClassifySample(const TrainingSample& sample, Pix* page_pix, int debug, int keep_this, GenericVector<ShapeRating>* results) { int num_results = pruner_->ClassifySample(sample, page_pix, debug, keep_this, results); if (page_pix == NULL) return num_results; ASSERT_HOST(cube_cntxt_ != NULL); const TBOX& char_box = sample.bounding_box(); CubeObject* cube_obj = new tesseract::CubeObject( cube_cntxt_, page_pix, char_box.left(), pixGetHeight(page_pix) - char_box.top(), char_box.width(), char_box.height()); CharAltList* alt_list = cube_obj->RecognizeChar(); CharSet* char_set = cube_cntxt_->CharacterSet(); if (alt_list != NULL) { for (int r = 0; r < num_results; ++r) { const Shape& shape = shape_table_.GetShape((*results)[r].shape_id); // Get the best cube probability of all unichars in the shape. double best_prob = 0.0; for (int i = 0; i < alt_list->AltCount(); ++i) { int alt_id = alt_list->Alt(i); int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id)); if (shape.ContainsUnichar(unichar_id) && alt_list->AltProb(i) > best_prob) { best_prob = alt_list->AltProb(i); } } (*results)[r].rating = best_prob; } delete alt_list; // Re-sort by rating. results->sort(&ShapeRating::SortDescendingRating); } delete cube_obj; return results->size(); }
// Returns a string debug representation of the given sample: // font, unichar_str, bounding box, page. STRING TrainingSampleSet::SampleToString(const TrainingSample &sample) const { STRING boxfile_str; MakeBoxFileStr(unicharset_.id_to_unichar(sample.class_id()), sample.bounding_box(), sample.page_num(), &boxfile_str); return STRING(fontinfo_table_.get(sample.font_id()).name) + " " + boxfile_str; }