/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) { PAGE_RES_IT pr_it(page_res); for (WERD_RES *word_res = pr_it.word(); word_res != NULL; word_res = pr_it.forward()) { WERD_CHOICE* choice = new WERD_CHOICE(word_res->uch_set, word_res->correct_text.size()); for (int i = 0; i < word_res->correct_text.size(); ++i) { // The part before the first space is the real ground truth, and the // rest is the bounding box location and page number. GenericVector<STRING> tokens; word_res->correct_text[i].split(' ', &tokens); UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].string()); choice->append_unichar_id_space_allocated(char_id, word_res->best_state[i], 0.0f, 0.0f); } word_res->ClearWordChoices(); word_res->LogNewRawChoice(choice); word_res->LogNewCookedChoice(1, false, choice); } }