C++ (Cpp) UNICHARSET::contains_unichar Exemples

Langage de programmation: C++ (Cpp)

Class/Type: UNICHARSET

Méthode/Fonction: contains_unichar

Exemples au hotexamples.com: 2

C++ (Cpp) UNICHARSET::contains_unichar - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de UNICHARSET::contains_unichar extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

id_to_unichar(12)

unichar_to_id(6)

load_from_file(6)

get_isupper(4)

size(4)

get_script_from_script_id(4)

get_top_bottom(3)

get_script_table_size(3)

save_to_file(3)

get_isdigit(3)

get_islower(3)

contains_unichar(2)

debug_str(2)

eq(2)

encode_string(2)

GetStrProperties(2)

get_script_id_from_name(1)

PropertiesIncomplete(1)

get_other_case(1)

get_normed_unichar(1)

encodable_string(1)

null_sid(1)

get_isalpha(1)

script_has_upper_lower(1)

SetPropertiesFromOther(1)

step(1)

top_bottom_useful(1)

unichar_insert(1)

Méthodes fréquemment utilisées

id_to_unichar (12)

unichar_to_id (6)

load_from_file (6)

get_isupper (4)

size (4)

get_script_from_script_id (4)

get_top_bottom (3)

get_script_table_size (3)

save_to_file (3)

get_isdigit (3)

Méthodes fréquemment utilisées

get_islower (3)

contains_unichar (2)

debug_str (2)

eq (2)

encode_string (2)

GetStrProperties (2)

get_script_id_from_name (1)

PropertiesIncomplete (1)

get_other_case (1)

get_normed_unichar (1)

encodable_string (1)

null_sid (1)

get_isalpha (1)

script_has_upper_lower (1)

SetPropertiesFromOther (1)

step (1)

top_bottom_useful (1)

unichar_insert (1)

Méthodes fréquemment utilisées

encodable_string (1)

null_sid (1)

get_isalpha (1)

script_has_upper_lower (1)

SetPropertiesFromOther (1)

step (1)

top_bottom_useful (1)

unichar_insert (1)

Exemple #1

0

Afficher le fichier

Fichier : ambigs.cpp Projet : Kailigithub/tesseract

bool UnicharAmbigs::ParseAmbiguityLine( int line_num, int version, int debug_level, const UNICHARSET &unicharset, char *buffer, int *test_ambig_part_size, UNICHAR_ID *test_unichar_ids, int *replacement_ambig_part_size, char *replacement_string, int *type) { if (version > 1) { // Simpler format is just wrong-string correct-string type\n. STRING input(buffer); GenericVector<STRING> fields; input.split(' ', &fields); if (fields.size() != 3) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } // Encode wrong-string. GenericVector<UNICHAR_ID> unichars; if (!unicharset.encode_string(fields[0].string(), true, &unichars, NULL, NULL)) { return false; } *test_ambig_part_size = unichars.size(); if (*test_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } // Copy encoded string to output. for (int i = 0; i < unichars.size(); ++i) test_unichar_ids[i] = unichars[i]; test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID; // Encode replacement-string to check validity. if (!unicharset.encode_string(fields[1].string(), true, &unichars, NULL, NULL)) { return false; } *replacement_ambig_part_size = unichars.size(); if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } if (sscanf(fields[2].string(), "%d", type) != 1) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].string()); return true; } int i; char *token; char *next_token; if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", test_ambig_part_size) || *test_ambig_part_size <= 0) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*test_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } for (i = 0; i < *test_ambig_part_size; ++i) { if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; if (!unicharset.contains_unichar(token)) { if (debug_level) tprintf(kIllegalUnicharMsg, token); break; } test_unichar_ids[i] = unicharset.unichar_to_id(token); } test_unichar_ids[i] = INVALID_UNICHAR_ID; if (i != *test_ambig_part_size || !(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", replacement_ambig_part_size) || *replacement_ambig_part_size <= 0) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } replacement_string[0] = '\0'; for (i = 0; i < *replacement_ambig_part_size; ++i) { if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; strcat(replacement_string, token); if (!unicharset.contains_unichar(token)) { if (debug_level) tprintf(kIllegalUnicharMsg, token); break; } } if (i != *replacement_ambig_part_size) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (version > 0) { // The next field being true indicates that the abiguity should // always be substituted (e.g. '' should always be changed to "). // For such "certain" n -> m ambigs tesseract will insert character // fragments for the n pieces in the unicharset. AmbigsFound() // will then replace the incorrect ngram with the character // fragments of the correct character (or ngram if m > 1). // Note that if m > 1, an ngram will be inserted into the // modified word, not the individual unigrams. Tesseract // has limited support for ngram unichar (e.g. dawg permuter). if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", type)) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } } return true; }

Exemple #2

0

Afficher le fichier

Fichier : ambigs.cpp Projet : Appiah/tesseractstuff

bool UnicharAmbigs::ParseAmbiguityLine( int line_num, int version, const UNICHARSET &unicharset, char *buffer, int *TestAmbigPartSize, UNICHAR_ID *TestUnicharIds, int *ReplacementAmbigPartSize, char *ReplacementString, int *type) { int i; char *token; char *next_token; if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", TestAmbigPartSize) || TestAmbigPartSize <= 0) { if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*TestAmbigPartSize > MAX_AMBIG_SIZE) { tprintf("Too many unichars in ambiguity on line %d\n"); return false; } for (i = 0; i < *TestAmbigPartSize; ++i) { if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; if (!unicharset.contains_unichar(token)) { if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token); break; } TestUnicharIds[i] = unicharset.unichar_to_id(token); } TestUnicharIds[i] = INVALID_UNICHAR_ID; if (i != *TestAmbigPartSize || !(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", ReplacementAmbigPartSize) || *ReplacementAmbigPartSize <= 0) { if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*ReplacementAmbigPartSize > MAX_AMBIG_SIZE) { tprintf("Too many unichars in ambiguity on line %d\n"); return false; } ReplacementString[0] = '\0'; for (i = 0; i < *ReplacementAmbigPartSize; ++i) { if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; strcat(ReplacementString, token); if (!unicharset.contains_unichar(token)) { if (global_ambigs_debug_level) tprintf(kIllegalUnicharMsg, token); break; } } if (i != *ReplacementAmbigPartSize) { if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num); return false; } if (version > 0) { // The next field being true indicates that the abiguity should // always be substituted (e.g. '' should always be changed to "). // For such "certain" n -> m ambigs tesseract will insert character // fragments for the n pieces in the unicharset. AmbigsFound() // will then replace the incorrect ngram with the character // fragments of the correct character (or ngram if m > 1). // Note that if m > 1, an ngram will be inserted into the // modified word, not the individual unigrams. Tesseract // has limited support for ngram unichar (e.g. dawg permuter). if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", type)) { if (global_ambigs_debug_level) tprintf(kIllegalMsg, line_num); return false; } } return true; }