示例#1
0
void UNICHARSET::unichar_insert(const char* const unichar_repr) {
  if (!ids.contains(unichar_repr)) {
    if (strlen(unichar_repr) > UNICHAR_LEN) {
      fprintf(stderr, "Utf8 buffer too big, size=%d for %s\n",
              int(strlen(unichar_repr)), unichar_repr);
      return;
    }
    if (size_used == size_reserved) {
      if (size_used == 0)
        reserve(8);
      else
        reserve(2 * size_used);
    }

    strcpy(unichars[size_used].representation, unichar_repr);
    this->set_script(size_used, null_script);
    // If the given unichar_repr represents a fragmented character, set
    // fragment property to a pointer to CHAR_FRAGMENT class instance with
    // information parsed from the unichar representation. Use the script
    // of the base unichar for the fragmented character if possible.
    CHAR_FRAGMENT *frag = CHAR_FRAGMENT::parse_from_string(unichar_repr);
    this->unichars[size_used].properties.fragment = frag;
    if (frag != NULL && this->contains_unichar(frag->get_unichar())) {
      this->unichars[size_used].properties.script_id =
        this->get_script(frag->get_unichar());
    }
    this->unichars[size_used].properties.enabled = true;
    ids.insert(unichar_repr, size_used);
    ++size_used;
  }
}
示例#2
0
void UNICHARSET::unichar_insert(const char* const unichar_repr,
                                OldUncleanUnichars old_style) {
  if (old_style == OldUncleanUnichars::kTrue) old_style_included_ = true;
  std::string cleaned =
      old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
  if (!cleaned.empty() && !ids.contains(cleaned.data(), cleaned.size())) {
    const char* str = cleaned.c_str();
    GenericVector<int> encoding;
    if (!old_style_included_ &&
        encode_string(str, true, &encoding, nullptr, nullptr))
      return;
    if (size_used == size_reserved) {
      if (size_used == 0)
        reserve(8);
      else
        reserve(2 * size_used);
    }
    int index = 0;
    do {
      if (index >= UNICHAR_LEN) {
        fprintf(stderr, "Utf8 buffer too big, size>%d for %s\n", UNICHAR_LEN,
                unichar_repr);
        return;
      }
      unichars[size_used].representation[index++] = *str++;
    } while (*str != '\0');
    unichars[size_used].representation[index] = '\0';
    this->set_script(size_used, null_script);
    // If the given unichar_repr represents a fragmented character, set
    // fragment property to a pointer to CHAR_FRAGMENT class instance with
    // information parsed from the unichar representation. Use the script
    // of the base unichar for the fragmented character if possible.
    CHAR_FRAGMENT* frag =
        CHAR_FRAGMENT::parse_from_string(unichars[size_used].representation);
    this->unichars[size_used].properties.fragment = frag;
    if (frag != nullptr && this->contains_unichar(frag->get_unichar())) {
      this->unichars[size_used].properties.script_id =
        this->get_script(frag->get_unichar());
    }
    this->unichars[size_used].properties.enabled = true;
    ids.insert(unichars[size_used].representation, size_used);
    ++size_used;
  }
}