bool UNICHARSET::load_via_fgets( TessResultCallback2<char *, char *, int> *fgets_cb, bool skip_fragments) { int unicharset_size; char buffer[256]; this->clear(); if (fgets_cb->Run(buffer, sizeof(buffer)) == NULL || sscanf(buffer, "%d", &unicharset_size) != 1) { return false; } this->reserve(unicharset_size); for (UNICHAR_ID id = 0; id < unicharset_size; ++id) { char unichar[256]; unsigned int properties; char script[64]; strcpy(script, null_script); int min_bottom = 0; int max_bottom = MAX_UINT8; int min_top = 0; int max_top = MAX_UINT8; int min_width = 0; int max_width = MAX_INT16; int min_bearing = 0; int max_bearing = MAX_INT16; int min_advance = 0; int max_advance = MAX_INT16; // TODO(eger): check that this default it ok // after enabling BiDi iterator for Arabic+Cube. int direction = UNICHARSET::U_LEFT_TO_RIGHT; UNICHAR_ID other_case = id; UNICHAR_ID mirror = id; char normed[64]; int v = -1; if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL || ((v = sscanf(buffer, "%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d %63s", unichar, &properties, &min_bottom, &max_bottom, &min_top, &max_top, &min_width, &max_width, &min_bearing, &max_bearing, &min_advance, &max_advance, script, &other_case, &direction, &mirror, normed)) != 17 && (v = sscanf(buffer, "%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d", unichar, &properties, &min_bottom, &max_bottom, &min_top, &max_top, &min_width, &max_width, &min_bearing, &max_bearing, &min_advance, &max_advance, script, &other_case, &direction, &mirror)) != 16 && (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d", unichar, &properties, &min_bottom, &max_bottom, &min_top, &max_top, script, &other_case, &direction, &mirror)) != 10 && (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d", unichar, &properties, &min_bottom, &max_bottom, &min_top, &max_top, script, &other_case)) != 8 && (v = sscanf(buffer, "%s %x %63s %d", unichar, &properties, script, &other_case)) != 4 && (v = sscanf(buffer, "%s %x %63s", unichar, &properties, script)) != 3 && (v = sscanf(buffer, "%s %x", unichar, &properties)) != 2)) { return false; } // Skip fragments if needed. CHAR_FRAGMENT *frag = NULL; if (skip_fragments && (frag = CHAR_FRAGMENT::parse_from_string(unichar))) { int num_pieces = frag->get_total(); delete frag; // Skip multi-element fragments, but keep singles like UNICHAR_BROKEN in. if (num_pieces > 1) continue; } // Insert unichar into unicharset and set its properties. if (strcmp(unichar, "NULL") == 0) this->unichar_insert(" "); else this->unichar_insert(unichar); this->set_isalpha(id, properties & ISALPHA_MASK); this->set_islower(id, properties & ISLOWER_MASK); this->set_isupper(id, properties & ISUPPER_MASK); this->set_isdigit(id, properties & ISDIGIT_MASK); this->set_ispunctuation(id, properties & ISPUNCTUATION_MASK); this->set_isngram(id, false); this->set_script(id, script); this->unichars[id].properties.enabled = true; this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top); this->set_width_range(id, min_width, max_width); this->set_bearing_range(id, min_bearing, max_bearing); this->set_advance_range(id, min_advance, max_advance); this->set_direction(id, static_cast<UNICHARSET::Direction>(direction)); ASSERT_HOST(other_case < unicharset_size); this->set_other_case(id, (v>3) ? other_case : id); ASSERT_HOST(mirror < unicharset_size); this->set_mirror(id, (v>8) ? mirror : id); this->set_normed(id, (v>16) ? normed : unichar); } post_load_setup(); return true; }