コード例 #1
0
ファイル: pango_font_info.cpp プロジェクト: 11110101/tess-two
int PangoFontInfo::DropUncoveredChars(string* utf8_text) const {
  PangoFont* font = ToPangoFont();
  PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
  int num_dropped_chars = 0;
  // Maintain two iterators that point into the string. For space efficiency, we
  // will repeatedly copy one covered UTF8 character from one to the other, and
  // at the end resize the string to the right length.
  char* out = const_cast<char*>(utf8_text->c_str());
  const UNICHAR::const_iterator it_begin =
      UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
  const UNICHAR::const_iterator it_end =
      UNICHAR::end(utf8_text->c_str(), utf8_text->length());
  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
    // Skip bad utf-8.
    if (!it.is_legal())
      continue;  // One suitable error message will still be issued.
    if (!IsWhitespace(*it) && !pango_is_zero_width(*it) &&
        pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
      if (TLOG_IS_ON(2)) {
        char tmp[5];
        int len = it.get_utf8(tmp);
        tmp[len] = '\0';
        tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
      }
      ++num_dropped_chars;
      continue;
    }
    strncpy(out, it.utf8_data(), it.utf8_len());
    out += it.utf8_len();
  }
  utf8_text->resize(out - utf8_text->c_str());
  return num_dropped_chars;
}
コード例 #2
0
ファイル: normstrngs.cpp プロジェクト: jan-ruzicka/tesseract
unsigned int SpanUTF8NotWhitespace(const char* text) {
  int n_notwhite = 0;
  for (UNICHAR::const_iterator it = UNICHAR::begin(text, strlen(text));
       it != UNICHAR::end(text, strlen(text)); ++it) {
    if (IsWhitespace(*it)) break;
    n_notwhite += it.utf8_len();
  }
  return n_notwhite;
}