Exemplos de UNICHARSET::encode_string em C++ (Cpp)

Linguagem de programação: C++ (Cpp)

Classe / Tipo: UNICHARSET

Método / Função: encode_string

Exemplos em hotexamples.com: 2

UNICHARSET::encode_string em C++ (Cpp) - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de UNICHARSET::encode_string em C++ (Cpp) extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

id_to_unichar(12)

unichar_to_id(6)

load_from_file(6)

get_isupper(4)

size(4)

get_script_from_script_id(4)

get_top_bottom(3)

get_script_table_size(3)

save_to_file(3)

get_isdigit(3)

get_islower(3)

contains_unichar(2)

debug_str(2)

eq(2)

encode_string(2)

GetStrProperties(2)

get_script_id_from_name(1)

PropertiesIncomplete(1)

get_other_case(1)

get_normed_unichar(1)

encodable_string(1)

null_sid(1)

get_isalpha(1)

script_has_upper_lower(1)

SetPropertiesFromOther(1)

step(1)

top_bottom_useful(1)

unichar_insert(1)

Métodos Frequentes

id_to_unichar (12)

unichar_to_id (6)

load_from_file (6)

get_isupper (4)

size (4)

get_script_from_script_id (4)

get_top_bottom (3)

get_script_table_size (3)

save_to_file (3)

get_isdigit (3)

Métodos Frequentes

get_islower (3)

contains_unichar (2)

debug_str (2)

eq (2)

encode_string (2)

GetStrProperties (2)

get_script_id_from_name (1)

PropertiesIncomplete (1)

get_other_case (1)

get_normed_unichar (1)

encodable_string (1)

null_sid (1)

get_isalpha (1)

script_has_upper_lower (1)

SetPropertiesFromOther (1)

step (1)

top_bottom_useful (1)

unichar_insert (1)

Métodos Frequentes

encodable_string (1)

null_sid (1)

get_isalpha (1)

script_has_upper_lower (1)

SetPropertiesFromOther (1)

step (1)

top_bottom_useful (1)

unichar_insert (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: ratngs.cpp Projeto: 11110101/tess-two

/** * WERD_CHOICE::WERD_CHOICE * * Constructor to build a WERD_CHOICE from the given string. * The function assumes that src_string is not NULL. */ WERD_CHOICE::WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset) : unicharset_(&unicharset){ GenericVector<UNICHAR_ID> encoding; GenericVector<char> lengths; if (unicharset.encode_string(src_string, true, &encoding, &lengths, NULL)) { lengths.push_back('\0'); STRING src_lengths = &lengths[0]; this->init(src_string, src_lengths.string(), 0.0, 0.0, NO_PERM); } else { // There must have been an invalid unichar in the string. this->init(8); this->make_bad(); } }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ambigs.cpp Projeto: Kailigithub/tesseract

bool UnicharAmbigs::ParseAmbiguityLine( int line_num, int version, int debug_level, const UNICHARSET &unicharset, char *buffer, int *test_ambig_part_size, UNICHAR_ID *test_unichar_ids, int *replacement_ambig_part_size, char *replacement_string, int *type) { if (version > 1) { // Simpler format is just wrong-string correct-string type\n. STRING input(buffer); GenericVector<STRING> fields; input.split(' ', &fields); if (fields.size() != 3) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } // Encode wrong-string. GenericVector<UNICHAR_ID> unichars; if (!unicharset.encode_string(fields[0].string(), true, &unichars, NULL, NULL)) { return false; } *test_ambig_part_size = unichars.size(); if (*test_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } // Copy encoded string to output. for (int i = 0; i < unichars.size(); ++i) test_unichar_ids[i] = unichars[i]; test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID; // Encode replacement-string to check validity. if (!unicharset.encode_string(fields[1].string(), true, &unichars, NULL, NULL)) { return false; } *replacement_ambig_part_size = unichars.size(); if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } if (sscanf(fields[2].string(), "%d", type) != 1) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].string()); return true; } int i; char *token; char *next_token; if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", test_ambig_part_size) || *test_ambig_part_size <= 0) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*test_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } for (i = 0; i < *test_ambig_part_size; ++i) { if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; if (!unicharset.contains_unichar(token)) { if (debug_level) tprintf(kIllegalUnicharMsg, token); break; } test_unichar_ids[i] = unicharset.unichar_to_id(token); } test_unichar_ids[i] = INVALID_UNICHAR_ID; if (i != *test_ambig_part_size || !(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", replacement_ambig_part_size) || *replacement_ambig_part_size <= 0) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) { if (debug_level) tprintf("Too many unichars in ambiguity on line %d\n", line_num); return false; } replacement_string[0] = '\0'; for (i = 0; i < *replacement_ambig_part_size; ++i) { if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token))) break; strcat(replacement_string, token); if (!unicharset.contains_unichar(token)) { if (debug_level) tprintf(kIllegalUnicharMsg, token); break; } } if (i != *replacement_ambig_part_size) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } if (version > 0) { // The next field being true indicates that the abiguity should // always be substituted (e.g. '' should always be changed to "). // For such "certain" n -> m ambigs tesseract will insert character // fragments for the n pieces in the unicharset. AmbigsFound() // will then replace the incorrect ngram with the character // fragments of the correct character (or ngram if m > 1). // Note that if m > 1, an ngram will be inserted into the // modified word, not the individual unigrams. Tesseract // has limited support for ngram unichar (e.g. dawg permuter). if (!(token = strtok_r(NULL, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", type)) { if (debug_level) tprintf(kIllegalMsg, line_num); return false; } } return true; }