void UNICHARSET::unichar_insert(const char* const unichar_repr) { if (!ids.contains(unichar_repr)) { if (strlen(unichar_repr) > UNICHAR_LEN) { fprintf(stderr, "Utf8 buffer too big, size=%d for %s\n", int(strlen(unichar_repr)), unichar_repr); return; } if (size_used == size_reserved) { if (size_used == 0) reserve(8); else reserve(2 * size_used); } strcpy(unichars[size_used].representation, unichar_repr); this->set_script(size_used, null_script); // If the given unichar_repr represents a fragmented character, set // fragment property to a pointer to CHAR_FRAGMENT class instance with // information parsed from the unichar representation. Use the script // of the base unichar for the fragmented character if possible. CHAR_FRAGMENT *frag = CHAR_FRAGMENT::parse_from_string(unichar_repr); this->unichars[size_used].properties.fragment = frag; if (frag != NULL && this->contains_unichar(frag->get_unichar())) { this->unichars[size_used].properties.script_id = this->get_script(frag->get_unichar()); } this->unichars[size_used].properties.enabled = true; ids.insert(unichar_repr, size_used); ++size_used; } }
void UNICHARSET::unichar_insert(const char* const unichar_repr, OldUncleanUnichars old_style) { if (old_style == OldUncleanUnichars::kTrue) old_style_included_ = true; std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr); if (!cleaned.empty() && !ids.contains(cleaned.data(), cleaned.size())) { const char* str = cleaned.c_str(); GenericVector<int> encoding; if (!old_style_included_ && encode_string(str, true, &encoding, nullptr, nullptr)) return; if (size_used == size_reserved) { if (size_used == 0) reserve(8); else reserve(2 * size_used); } int index = 0; do { if (index >= UNICHAR_LEN) { fprintf(stderr, "Utf8 buffer too big, size>%d for %s\n", UNICHAR_LEN, unichar_repr); return; } unichars[size_used].representation[index++] = *str++; } while (*str != '\0'); unichars[size_used].representation[index] = '\0'; this->set_script(size_used, null_script); // If the given unichar_repr represents a fragmented character, set // fragment property to a pointer to CHAR_FRAGMENT class instance with // information parsed from the unichar representation. Use the script // of the base unichar for the fragmented character if possible. CHAR_FRAGMENT* frag = CHAR_FRAGMENT::parse_from_string(unichars[size_used].representation); this->unichars[size_used].properties.fragment = frag; if (frag != nullptr && this->contains_unichar(frag->get_unichar())) { this->unichars[size_used].properties.script_id = this->get_script(frag->get_unichar()); } this->unichars[size_used].properties.enabled = true; ids.insert(unichars[size_used].representation, size_used); ++size_used; } }