示例#1
0
bool UNICHARSET::load_via_fgets(
    TessResultCallback2<char *, char *, int> *fgets_cb,
    bool skip_fragments) {
  int unicharset_size;
  char buffer[256];

  this->clear();
  if (fgets_cb->Run(buffer, sizeof(buffer)) == NULL ||
      sscanf(buffer, "%d", &unicharset_size) != 1) {
    return false;
  }
  this->reserve(unicharset_size);
  for (UNICHAR_ID id = 0; id < unicharset_size; ++id) {
    char unichar[256];
    unsigned int properties;
    char script[64];

    strcpy(script, null_script);
    int min_bottom = 0;
    int max_bottom = MAX_UINT8;
    int min_top = 0;
    int max_top = MAX_UINT8;
    int min_width = 0;
    int max_width = MAX_INT16;
    int min_bearing = 0;
    int max_bearing = MAX_INT16;
    int min_advance = 0;
    int max_advance = MAX_INT16;
    // TODO(eger): check that this default it ok
    // after enabling BiDi iterator for Arabic+Cube.
    int direction = UNICHARSET::U_LEFT_TO_RIGHT;
    UNICHAR_ID other_case = id;
    UNICHAR_ID mirror = id;
    char normed[64];
    int v = -1;
    if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL ||
        ((v = sscanf(buffer,
                     "%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d %63s",
                     unichar, &properties,
                     &min_bottom, &max_bottom, &min_top, &max_top,
                     &min_width, &max_width, &min_bearing, &max_bearing,
                     &min_advance, &max_advance, script, &other_case,
                     &direction, &mirror, normed)) != 17 &&
         (v = sscanf(buffer,
                     "%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d",
                     unichar, &properties,
                     &min_bottom, &max_bottom, &min_top, &max_top,
                     &min_width, &max_width, &min_bearing, &max_bearing,
                     &min_advance, &max_advance,
                     script, &other_case, &direction, &mirror)) != 16 &&
          (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d",
                      unichar, &properties,
                      &min_bottom, &max_bottom, &min_top, &max_top,
                      script, &other_case, &direction, &mirror)) != 10 &&
          (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d", unichar, &properties,
                      &min_bottom, &max_bottom, &min_top, &max_top,
                      script, &other_case)) != 8 &&
          (v = sscanf(buffer, "%s %x %63s %d", unichar, &properties,
                      script, &other_case)) != 4 &&
          (v = sscanf(buffer, "%s %x %63s",
                      unichar, &properties, script)) != 3 &&
          (v = sscanf(buffer, "%s %x", unichar, &properties)) != 2)) {
      return false;
    }

    // Skip fragments if needed.
    CHAR_FRAGMENT *frag = NULL;
    if (skip_fragments && (frag = CHAR_FRAGMENT::parse_from_string(unichar))) {
      int num_pieces = frag->get_total();
      delete frag;
      // Skip multi-element fragments, but keep singles like UNICHAR_BROKEN in.
      if (num_pieces > 1)
        continue;
    }
    // Insert unichar into unicharset and set its properties.
    if (strcmp(unichar, "NULL") == 0)
      this->unichar_insert(" ");
    else
      this->unichar_insert(unichar);

    this->set_isalpha(id, properties & ISALPHA_MASK);
    this->set_islower(id, properties & ISLOWER_MASK);
    this->set_isupper(id, properties & ISUPPER_MASK);
    this->set_isdigit(id, properties & ISDIGIT_MASK);
    this->set_ispunctuation(id, properties & ISPUNCTUATION_MASK);
    this->set_isngram(id, false);
    this->set_script(id, script);
    this->unichars[id].properties.enabled = true;
    this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top);
    this->set_width_range(id, min_width, max_width);
    this->set_bearing_range(id, min_bearing, max_bearing);
    this->set_advance_range(id, min_advance, max_advance);
    this->set_direction(id, static_cast<UNICHARSET::Direction>(direction));
    ASSERT_HOST(other_case < unicharset_size);
    this->set_other_case(id, (v>3) ? other_case : id);
    ASSERT_HOST(mirror < unicharset_size);
    this->set_mirror(id, (v>8) ? mirror : id);
    this->set_normed(id, (v>16) ? normed : unichar);
  }
  post_load_setup();
  return true;
}