string LigatureTable::RemoveCustomLigatures(const string& str) const { string result; UNICHAR::const_iterator it_begin = UNICHAR::begin(str.c_str(), str.length()); UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length()); char tmp[5]; int len; int norm_ind; for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { len = it.get_utf8(tmp); tmp[len] = '\0'; norm_ind = -1; for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr && norm_ind < 0; ++i) { if (!strcmp(tmp, UNICHARSET::kCustomLigatures[i][1])) { norm_ind = i; } } if (norm_ind >= 0) { result += UNICHARSET::kCustomLigatures[norm_ind][0]; } else { result += tmp; } } return result; }
unsigned int SpanUTF8NotWhitespace(const char* text) { int n_notwhite = 0; for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text)); it != UNICHAR::end(text, strlen(text)); ++it) { if (IsWhitespace(*it)) break; n_notwhite += it.utf8_len(); } return n_notwhite; }
string LigatureTable::RemoveLigatures(const string& str) const { string result; UNICHAR::const_iterator it_begin = UNICHAR::begin(str.c_str(), str.length()); UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length()); char tmp[5]; int len; for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { len = it.get_utf8(tmp); tmp[len] = '\0'; LigHash::const_iterator lig_it = lig_to_norm_table_.find(tmp); if (lig_it != lig_to_norm_table_.end()) { result += lig_it->second; } else { result += tmp; } } return result; }
bool PangoFontInfo::CoversUTF8Text(const char* utf8_text, int byte_length) const { PangoFont* font = ToPangoFont(); PangoCoverage* coverage = pango_font_get_coverage(font, NULL); for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length); it != UNICHAR::end(utf8_text, byte_length); ++it) { if (IsWhitespace(*it) || pango_is_zero_width(*it)) continue; if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) { char tmp[5]; int len = it.get_utf8(tmp); tmp[len] = '\0'; tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it); return false; } } return true; }
int PangoFontInfo::DropUncoveredChars(string* utf8_text) const { PangoFont* font = ToPangoFont(); PangoCoverage* coverage = pango_font_get_coverage(font, NULL); int num_dropped_chars = 0; // Maintain two iterators that point into the string. For space efficiency, we // will repeatedly copy one covered UTF8 character from one to the other, and // at the end resize the string to the right length. char* out = const_cast<char*>(utf8_text->c_str()); const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_text->c_str(), utf8_text->length()); const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_text->c_str(), utf8_text->length()); for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { // Skip bad utf-8. if (!it.is_legal()) continue; // One suitable error message will still be issued. if (!IsWhitespace(*it) && !pango_is_zero_width(*it) && pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) { if (TLOG_IS_ON(2)) { char tmp[5]; int len = it.get_utf8(tmp); tmp[len] = '\0'; tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it); } ++num_dropped_chars; continue; } strncpy(out, it.utf8_data(), it.utf8_len()); out += it.utf8_len(); } utf8_text->resize(out - utf8_text->c_str()); return num_dropped_chars; }