bool Dict::absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharset) { if (word.length() < kMinAbsoluteGarbageWordLength) return false; int num_alphanum = 0; for (int x = 0; x < word.length(); ++x) { num_alphanum += (unicharset.get_isalpha(word.unichar_id(x)) || unicharset.get_isdigit(word.unichar_id(x))); } return (static_cast<float>(num_alphanum) / static_cast<float>(word.length()) < kMinAbsoluteGarbageAlphanumFrac); }
int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) { int state = 0; int x; for (x = 0; x < word.length(); ++x) { UNICHAR_ID ch_id = word.unichar_id(x); if (unicharset.get_isupper(ch_id)) state = case_state_table[state][1]; else if (unicharset.get_islower(ch_id)) state = case_state_table[state][2]; else if (unicharset.get_isdigit(ch_id)) state = case_state_table[state][3]; else state = case_state_table[state][0]; if (state == -1) return false; } return state != 5; // single lower is bad }
BOOL8 Tesseract::non_0_digit(const UNICHARSET& ch_set, UNICHAR_ID unichar_id) { return ch_set.get_isdigit(unichar_id) && !ch_set.eq(unichar_id, "0"); }