コード例 #1
0
void ComplexTextController::setupFontForScriptRun()
{
    FontDataVariant fontDataVariant = AutoVariant;
    // Determine if this script run needs to be converted to small caps.
    // nextScriptRun() will always send us a run of the same case, because a
    // case change while in small-caps mode always results in different
    // FontData, so we only need to check the first character's case.
    if (m_font->isSmallCaps() && u_islower(m_item.string[m_item.item.pos])) {
        m_smallCapsString = String(m_run.data(m_item.item.pos), m_item.item.length);
        m_smallCapsString.makeUpper();
        m_item.string = m_smallCapsString.characters();
        m_item.item.pos = 0;
        fontDataVariant = SmallCapsVariant;
    }
    const FontData* fontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos], false, fontDataVariant).fontData;
    const FontPlatformData& platformData = fontData->fontDataForCharacter(' ')->platformData();
    m_item.face = platformData.harfbuzzFace();
    void* opaquePlatformData = const_cast<FontPlatformData*>(&platformData);
    m_item.font->userData = opaquePlatformData;

    int size = platformData.size();
    m_item.font->x_ppem = size;
    m_item.font->y_ppem = size;
    // x_ and y_scale are the conversion factors from font design space (fEmSize) to 1/64th of device pixels in 16.16 format.
    const int devicePixelFraction = 64;
    const int multiplyFor16Dot16 = 1 << 16;
    int scale = devicePixelFraction * size * multiplyFor16Dot16 / platformData.emSizeInFontUnits();
    m_item.font->x_scale = scale;
    m_item.font->y_scale = scale;
}
コード例 #2
0
ファイル: icu.c プロジェクト: artbycrunk/calibre
static PyObject* icu_swap_case(PyObject *self, PyObject *input) {
    PyObject *result = NULL;
    UErrorCode status = U_ZERO_ERROR;
    UChar *input_buf = NULL, *output_buf = NULL;
    UChar32 *buf = NULL;
    int32_t sz = 0, sz32 = 0, i = 0;

    input_buf = python_to_icu(input, &sz);
    if (input_buf == NULL) goto end;
    output_buf = (UChar*) calloc(3 * sz, sizeof(UChar));
    buf = (UChar32*) calloc(2 * sz, sizeof(UChar32));
    if (output_buf == NULL || buf == NULL) { PyErr_NoMemory(); goto end; }
    u_strToUTF32(buf, 2 * sz, &sz32, input_buf, sz, &status);

    for (i = 0; i < sz32; i++) {
        if (u_islower(buf[i])) buf[i] = u_toupper(buf[i]);
        else if (u_isupper(buf[i])) buf[i] = u_tolower(buf[i]);
    }
    u_strFromUTF32(output_buf, 3*sz, &sz, buf, sz32, &status);
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, u_errorName(status)); goto end; }
    result = icu_to_python(output_buf, sz);

end:
    if (input_buf != NULL) free(input_buf);
    if (output_buf != NULL) free(output_buf);
    if (buf != NULL) free(buf);
    return result;

} // }}}
コード例 #3
0
ファイル: qa_checks.cpp プロジェクト: vslavik/poedit
    bool CheckString(CatalogItemPtr item, const wxString& source, const wxString& translation) override
    {
        if (u_isupper(source[0]) && u_islower(translation[0]))
        {
            item->SetIssue(CatalogItem::Issue::Warning, _("The translation should start as a sentence."));
            return true;
        }

        if (u_islower(source[0]) && u_isupper(translation[0]))
        {
            if (m_lang != "de")
            {
                item->SetIssue(CatalogItem::Issue::Warning, _("The translation should start with a lowercase character."));
                return true;
            }
            // else: German nouns start uppercased, this would cause too many false positives
        }

        return false;
    }
コード例 #4
0
bool HarfBuzzShaper::shapeHarfBuzzRuns(bool shouldSetDirection)
{
    HarfBuzzScopedPtr<hb_buffer_t> harfBuzzBuffer(hb_buffer_create(), hb_buffer_destroy);

    hb_buffer_set_unicode_funcs(harfBuzzBuffer.get(), hb_icu_get_unicode_funcs());

    for (unsigned i = 0; i < m_harfBuzzRuns.size(); ++i) {
        unsigned runIndex = m_run.rtl() ? m_harfBuzzRuns.size() - i - 1 : i;
        HarfBuzzRun* currentRun = m_harfBuzzRuns[runIndex].get();
        const SimpleFontData* currentFontData = currentRun->fontData();
        if (currentFontData->isSVGFont())
            return false;

        hb_buffer_set_script(harfBuzzBuffer.get(), currentRun->script());
        if (shouldSetDirection)
            hb_buffer_set_direction(harfBuzzBuffer.get(), currentRun->rtl() ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
        else
            // Leaving direction to HarfBuzz to guess is *really* bad, but will do for now.
            hb_buffer_guess_segment_properties(harfBuzzBuffer.get());

        // Add a space as pre-context to the buffer. This prevents showing dotted-circle
        // for combining marks at the beginning of runs.
        static const uint16_t preContext = ' ';
        hb_buffer_add_utf16(harfBuzzBuffer.get(), &preContext, 1, 1, 0);

        if (m_font->isSmallCaps() && u_islower(m_normalizedBuffer[currentRun->startIndex()])) {
            String upperText = String(m_normalizedBuffer.get() + currentRun->startIndex(), currentRun->numCharacters());
            upperText.makeUpper();
            currentFontData = m_font->glyphDataForCharacter(upperText[0], false, SmallCapsVariant).fontData;
            hb_buffer_add_utf16(harfBuzzBuffer.get(), upperText.characters(), currentRun->numCharacters(), 0, currentRun->numCharacters());
        } else
            hb_buffer_add_utf16(harfBuzzBuffer.get(), m_normalizedBuffer.get() + currentRun->startIndex(), currentRun->numCharacters(), 0, currentRun->numCharacters());

        FontPlatformData* platformData = const_cast<FontPlatformData*>(&currentFontData->platformData());
        HarfBuzzFace* face = platformData->harfBuzzFace();
        if (!face)
            return false;

        if (m_font->fontDescription().orientation() == Vertical)
            face->setScriptForVerticalGlyphSubstitution(harfBuzzBuffer.get());

        HarfBuzzScopedPtr<hb_font_t> harfBuzzFont(face->createFont(), hb_font_destroy);

        hb_shape(harfBuzzFont.get(), harfBuzzBuffer.get(), m_features.isEmpty() ? 0 : m_features.data(), m_features.size());

        currentRun->applyShapeResult(harfBuzzBuffer.get());
        setGlyphPositionsForHarfBuzzRun(currentRun, harfBuzzBuffer.get());

        hb_buffer_reset(harfBuzzBuffer.get());
    }

    return true;
}
コード例 #5
0
ファイル: locdspnm.cpp プロジェクト: 119120119/node
UnicodeString&
LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage,
                                                UnicodeString& result) const {
#if !UCONFIG_NO_BREAK_ITERATION
    // check to see whether we need to titlecase result
    if ( result.length() > 0 && u_islower(result.char32At(0)) && capitalizationBrkIter!= NULL &&
          ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || fCapitalization[usage] ) ) {
        // note fCapitalization[usage] won't be set unless capitalizationContext is UI_LIST_OR_MENU or STANDALONE
        Mutex lock(&capitalizationBrkIterLock);
        result.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
    }
#endif
    return result;
}
コード例 #6
0
ファイル: props.cpp プロジェクト: 00zhengfu00/third_party
static void
printProps(UChar32 codePoint) {
    char buffer[100];
    UErrorCode errorCode;

    /* get the character name */
    errorCode=U_ZERO_ERROR;
    u_charName(codePoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);

    /* print the code point and the character name */
    printf("U+%04lx\t%s\n", codePoint, buffer);

    /* print some properties */
    printf("  general category (numeric enum value): %u\n", u_charType(codePoint));

    /* note: these APIs do not provide the data from SpecialCasing.txt */
    printf("  is lowercase: %d  uppercase: U+%04lx\n", u_islower(codePoint), u_toupper(codePoint));

    printf("  is digit: %d  decimal digit value: %d\n", u_isdigit(codePoint), u_charDigitValue(codePoint));

    printf("  BiDi directional category (numeric enum value): %u\n", u_charDirection(codePoint));
}
コード例 #7
0
bool HarfBuzzShaper::shapeHarfBuzzRuns()
{
    HarfBuzzScopedPtr<hb_buffer_t> harfbuzzBuffer(hb_buffer_create(), hb_buffer_destroy);

    hb_buffer_set_unicode_funcs(harfbuzzBuffer.get(), hb_icu_get_unicode_funcs());
    if (m_run.rtl() || m_run.directionalOverride())
        hb_buffer_set_direction(harfbuzzBuffer.get(), m_run.rtl() ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);

    for (unsigned i = 0; i < m_harfbuzzRuns.size(); ++i) {
        unsigned runIndex = m_run.rtl() ? m_harfbuzzRuns.size() - i - 1 : i;
        HarfBuzzRun* currentRun = m_harfbuzzRuns[runIndex].get();
        const SimpleFontData* currentFontData = currentRun->fontData();

        if (m_font->isSmallCaps() && u_islower(m_normalizedBuffer[currentRun->startIndex()])) {
            String upperText = String(m_normalizedBuffer.get() + currentRun->startIndex(), currentRun->numCharacters());
            upperText.makeUpper();
            currentFontData = m_font->glyphDataForCharacter(upperText[0], false, SmallCapsVariant).fontData;
            hb_buffer_add_utf16(harfbuzzBuffer.get(), upperText.characters(), currentRun->numCharacters(), 0, currentRun->numCharacters());
        } else
            hb_buffer_add_utf16(harfbuzzBuffer.get(), m_normalizedBuffer.get() + currentRun->startIndex(), currentRun->numCharacters(), 0, currentRun->numCharacters());

        FontPlatformData* platformData = const_cast<FontPlatformData*>(&currentFontData->platformData());
        HarfBuzzNGFace* face = platformData->harfbuzzFace();
        if (!face)
            return false;
        HarfBuzzScopedPtr<hb_font_t> harfbuzzFont(face->createFont(), hb_font_destroy);
        hb_shape(harfbuzzFont.get(), harfbuzzBuffer.get(), m_features.isEmpty() ? 0 : m_features.data(), m_features.size());

        currentRun->applyShapeResult(harfbuzzBuffer.get());
        setGlyphPositionsForHarfBuzzRun(currentRun, harfbuzzBuffer.get());

        hb_buffer_reset(harfbuzzBuffer.get());
        if (m_run.rtl() || m_run.directionalOverride())
            hb_buffer_set_direction(harfbuzzBuffer.get(), m_run.rtl() ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
    }

    return true;
}
コード例 #8
0
ファイル: shared.c プロジェクト: tidatida/parrot
static int
u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
{
    ASSERT_ARGS(u_iscclass)
#if PARROT_HAS_ICU
    UNUSED(interp);
            /* XXX which one
               return u_charDigitValue(codepoint);
               */
    if ((flags & enum_cclass_uppercase)    && u_isupper(codepoint))  return 1;
    if ((flags & enum_cclass_lowercase)    && u_islower(codepoint))  return 1;
    if ((flags & enum_cclass_alphabetic)   && u_isalpha(codepoint))  return 1;
    if ((flags & enum_cclass_numeric)      && u_isdigit(codepoint))  return 1;
    if ((flags & enum_cclass_hexadecimal)  && u_isxdigit(codepoint)) return 1;
    if ((flags & enum_cclass_whitespace)   && u_isspace(codepoint))  return 1;
    if ((flags & enum_cclass_printing)     && u_isprint(codepoint))  return 1;
    if ((flags & enum_cclass_graphical)    && u_isgraph(codepoint))  return 1;
    if ((flags & enum_cclass_blank)        && u_isblank(codepoint))  return 1;
    if ((flags & enum_cclass_control)      && u_iscntrl(codepoint))  return 1;
    if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint))  return 1;
    if ((flags & enum_cclass_word)         &&
        (u_isalnum(codepoint) || codepoint == '_'))                  return 1;
    if ((flags & enum_cclass_newline)      &&
        (codepoint == 0x2028 || codepoint == 0x2029 ||
         u_hasBinaryProperty(codepoint, UCHAR_LINE_BREAK)))          return 1;

    return 0;
#else
    if (codepoint < 256)
        return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;

    if (flags == enum_cclass_any)
        return 1;

    /* All codepoints from u+0100 to u+02af are alphabetic, so we
     * cheat on the WORD and ALPHABETIC properties to include these
     * (and incorrectly exclude all others).  This is a stopgap until
     * ICU is everywhere, or we have better non-ICU unicode support. */
    if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
        return (codepoint < 0x2b0);

    if (flags & enum_cclass_whitespace) {
        /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
        switch (codepoint) {
          case 0x1680: case 0x180e: case 0x2000: case 0x2001:
          case 0x2002: case 0x2003: case 0x2004: case 0x2005:
          case 0x2006: case 0x2007: case 0x2008: case 0x2009:
          case 0x200a: case 0x2028: case 0x2029: case 0x202f:
          case 0x205f: case 0x3000:
            return 1;
          default:
            break;
        }
    }

    if (flags & enum_cclass_numeric) {
        /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
        if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1;
        if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1;
        if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1;
        if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1;
        if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1;
        if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1;
        if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1;
        if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1;
        if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1;
        if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1;
        if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1;
        if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1;
        if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1;
        if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1;
        if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1;
        if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1;
        if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1;
        if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1;
        if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1;
        if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1;
        if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1;
        if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1;
    }

    if (flags & enum_cclass_newline) {
        /* from http://www.unicode.org/Public/UNIDATA/extracted/DerivedLineBreak.txt
         * Line_Break=Mandatory_Break*/
        if (codepoint == 0x2028 || codepoint == 0x2029) return 1;
    }

    if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline))
        Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_LIBRARY_ERROR,
            "no ICU lib loaded");

    return 0;
#endif
}
コード例 #9
0
static jboolean Character_isLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
    return u_islower(codePoint);
}
コード例 #10
0
ファイル: java_lang_Character.cpp プロジェクト: webos21/xi
//static jboolean Character_isLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
JNIEXPORT jboolean JNICALL
Java_java_lang_Character_isLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
    return u_islower(codePoint);
}
コード例 #11
0
ファイル: reldtfmt.cpp プロジェクト: DavidCai1993/node
UnicodeString& RelativeDateFormat::format(  Calendar& cal,
                                UnicodeString& appendTo,
                                FieldPosition& pos) const {

    UErrorCode status = U_ZERO_ERROR;
    UnicodeString relativeDayString;
    UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);

    // calculate the difference, in days, between 'cal' and now.
    int dayDiff = dayDifference(cal, status);

    // look up string
    int32_t len = 0;
    const UChar *theString = getStringForDay(dayDiff, len, status);
    if(U_SUCCESS(status) && (theString!=NULL)) {
        // found a relative string
        relativeDayString.setTo(theString, len);
    }

    if ( relativeDayString.length() > 0 && !fDatePattern.isEmpty() &&
         (fTimePattern.isEmpty() || fCombinedFormat == NULL || fCombinedHasDateAtStart)) {
#if !UCONFIG_NO_BREAK_ITERATION
        // capitalize relativeDayString according to context for relative, set formatter no context
        if ( u_islower(relativeDayString.char32At(0)) && fCapitalizationBrkIter!= NULL &&
             ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
               (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && fCapitalizationOfRelativeUnitsForUIListMenu) ||
               (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && fCapitalizationOfRelativeUnitsForStandAlone) ) ) {
            // titlecase first word of relativeDayString
            relativeDayString.toTitle(fCapitalizationBrkIter, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
        }
#endif
        fDateTimeFormatter->setContext(UDISPCTX_CAPITALIZATION_NONE, status);
    } else {
        // set our context for the formatter
        fDateTimeFormatter->setContext(capitalizationContext, status);
    }

    if (fDatePattern.isEmpty()) {
        fDateTimeFormatter->applyPattern(fTimePattern);
        fDateTimeFormatter->format(cal,appendTo,pos);
    } else if (fTimePattern.isEmpty() || fCombinedFormat == NULL) {
        if (relativeDayString.length() > 0) {
            appendTo.append(relativeDayString);
        } else {
            fDateTimeFormatter->applyPattern(fDatePattern);
            fDateTimeFormatter->format(cal,appendTo,pos);
        }
    } else {
        UnicodeString datePattern;
        if (relativeDayString.length() > 0) {
            // Need to quote the relativeDayString to make it a legal date pattern
            relativeDayString.findAndReplace(UNICODE_STRING("'", 1), UNICODE_STRING("''", 2)); // double any existing APOSTROPHE
            relativeDayString.insert(0, APOSTROPHE); // add APOSTROPHE at beginning...
            relativeDayString.append(APOSTROPHE); // and at end
            datePattern.setTo(relativeDayString);
        } else {
            datePattern.setTo(fDatePattern);
        }
        UnicodeString combinedPattern;
        fCombinedFormat->format(fTimePattern, datePattern, combinedPattern, status);
        fDateTimeFormatter->applyPattern(combinedPattern);
        fDateTimeFormatter->format(cal,appendTo,pos);
    }

    return appendTo;
}
コード例 #12
0
jboolean fastiva_vm_Character_C$__isLowerCaseImpl(jint codePoint) {
    return u_islower(codePoint);
}
コード例 #13
0
// Helper sets the character attribute properties and sets up the script table.
// Does not set tops and bottoms.
void SetupBasicProperties(bool report_errors, bool decompose,
                          UNICHARSET* unicharset) {
  for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
    // Convert any custom ligatures.
    const char* unichar_str = unicharset->id_to_unichar(unichar_id);
    for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
      if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {
        unichar_str = UNICHARSET::kCustomLigatures[i][0];
        break;
      }
    }

    // Convert the unichar to UTF32 representation
    std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str);

    // Assume that if the property is true for any character in the string,
    // then it holds for the whole "character".
    bool unichar_isalpha = false;
    bool unichar_islower = false;
    bool unichar_isupper = false;
    bool unichar_isdigit = false;
    bool unichar_ispunct = false;

    for (char32 u_ch : uni_vector) {
      if (u_isalpha(u_ch)) unichar_isalpha = true;
      if (u_islower(u_ch)) unichar_islower = true;
      if (u_isupper(u_ch)) unichar_isupper = true;
      if (u_isdigit(u_ch)) unichar_isdigit = true;
      if (u_ispunct(u_ch)) unichar_ispunct = true;
    }

    unicharset->set_isalpha(unichar_id, unichar_isalpha);
    unicharset->set_islower(unichar_id, unichar_islower);
    unicharset->set_isupper(unichar_id, unichar_isupper);
    unicharset->set_isdigit(unichar_id, unichar_isdigit);
    unicharset->set_ispunctuation(unichar_id, unichar_ispunct);

    tesseract::IcuErrorCode err;
    unicharset->set_script(unichar_id, uscript_getName(
        uscript_getScript(uni_vector[0], err)));

    const int num_code_points = uni_vector.size();
    // Obtain the lower/upper case if needed and record it in the properties.
    unicharset->set_other_case(unichar_id, unichar_id);
    if (unichar_islower || unichar_isupper) {
      std::vector<char32> other_case(num_code_points, 0);
      for (int i = 0; i < num_code_points; ++i) {
        // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used.
        // However since they deal with UChars (so need a conversion function
        // from char32 or UTF8string) and require a meaningful locale string,
        // for now u_tolower()/u_toupper() are used.
        other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) :
          u_tolower(uni_vector[i]);
      }
      std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case);
      UNICHAR_ID other_case_id =
          unicharset->unichar_to_id(other_case_uch.c_str());
      if (other_case_id != INVALID_UNICHAR_ID) {
        unicharset->set_other_case(unichar_id, other_case_id);
      } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) {
        tprintf("Other case %s of %s is not in unicharset\n",
                other_case_uch.c_str(), unichar_str);
      }
    }

    // Set RTL property and obtain mirror unichar ID from ICU.
    std::vector<char32> mirrors(num_code_points, 0);
    for (int i = 0; i < num_code_points; ++i) {
      mirrors[i] = u_charMirror(uni_vector[i]);
      if (i == 0) {  // set directionality to that of the 1st code point
        unicharset->set_direction(unichar_id,
                                  static_cast<UNICHARSET::Direction>(
                                      u_charDirection(uni_vector[i])));
      }
    }
    std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors);
    UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str());
    if (mirror_uch_id != INVALID_UNICHAR_ID) {
      unicharset->set_mirror(unichar_id, mirror_uch_id);
    } else if (report_errors) {
      tprintf("Mirror %s of %s is not in unicharset\n",
              mirror_uch.c_str(), unichar_str);
    }

    // Record normalized version of this unichar.
    std::string normed_str;
    if (unichar_id != 0 &&
        tesseract::NormalizeUTF8String(
            decompose ? tesseract::UnicodeNormMode::kNFKD
                      : tesseract::UnicodeNormMode::kNFKC,
            tesseract::OCRNorm::kNormalize, tesseract::GraphemeNorm::kNone,
            unichar_str, &normed_str) &&
        !normed_str.empty()) {
      unicharset->set_normed(unichar_id, normed_str.c_str());
    } else {
      unicharset->set_normed(unichar_id, unichar_str);
    }
    ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size());
  }
  unicharset->post_load_setup();
}
コード例 #14
0
bool HarfBuzzShaper::setupHarfBuzzRun()
{
    m_startIndexOfCurrentRun += m_numCharactersOfCurrentRun;

    // Iterate through the text to take the largest range that stays within
    // a single font.
    int endOfRunIndex = m_normalizedBufferLength - m_startIndexOfCurrentRun;
    SurrogatePairAwareTextIterator iterator(m_normalizedBuffer.get() + m_startIndexOfCurrentRun, 0, endOfRunIndex, endOfRunIndex);
    UChar32 character;
    unsigned clusterLength = 0;
    if (!iterator.consume(character, clusterLength))
        return false;

    m_currentFontData = m_font->glyphDataForCharacter(character, false).fontData;
    UErrorCode errorCode = U_ZERO_ERROR;
    UScriptCode currentScript = uscript_getScript(character, &errorCode);
    if (U_FAILURE(errorCode))
        return false;
    if (currentScript == USCRIPT_INHERITED)
        currentScript = USCRIPT_COMMON;
    for (iterator.advance(clusterLength); iterator.consume(character, clusterLength); iterator.advance(clusterLength)) {
        const SimpleFontData* nextFontData = m_font->glyphDataForCharacter(character, false).fontData;
        if (nextFontData != m_currentFontData)
            break;
        UScriptCode nextScript = uscript_getScript(character, &errorCode);
        if (U_FAILURE(errorCode))
            return false;
        if (currentScript == nextScript || nextScript == USCRIPT_INHERITED || nextScript == USCRIPT_COMMON)
            continue;
        if (currentScript == USCRIPT_COMMON)
            currentScript = nextScript;
        else
            break;
    }
    m_numCharactersOfCurrentRun = iterator.currentCharacter();

    if (!m_harfbuzzBuffer) {
        m_harfbuzzBuffer = hb_buffer_create();
        hb_buffer_set_unicode_funcs(m_harfbuzzBuffer, hb_icu_get_unicode_funcs());
    } else
        hb_buffer_reset(m_harfbuzzBuffer);
    hb_buffer_set_script(m_harfbuzzBuffer, hb_icu_script_to_script(currentScript));

    // WebKit always sets direction to LTR during width calculation.
    // We only set direction when direction is explicitly set to RTL so that
    // preventng wrong width calculation.
    if (m_run.rtl())
        hb_buffer_set_direction(m_harfbuzzBuffer, HB_DIRECTION_RTL);

    // Determine whether this run needs to be converted to small caps.
    // nextScriptRun() will always send us a run of the same case, because a
    // case change while in small-caps mode always results in different
    // FontData, so we only need to check the first character's case.
    if (m_font->isSmallCaps() && u_islower(m_normalizedBuffer[m_startIndexOfCurrentRun])) {
        String upperText = String(m_normalizedBuffer.get() + m_startIndexOfCurrentRun, m_numCharactersOfCurrentRun);
        upperText.makeUpper();
        m_currentFontData = m_font->glyphDataForCharacter(upperText[0], false, SmallCapsVariant).fontData;
        hb_buffer_add_utf16(m_harfbuzzBuffer, upperText.characters(), m_numCharactersOfCurrentRun, 0, m_numCharactersOfCurrentRun);
    } else
        hb_buffer_add_utf16(m_harfbuzzBuffer, m_normalizedBuffer.get() + m_startIndexOfCurrentRun, m_numCharactersOfCurrentRun, 0, m_numCharactersOfCurrentRun);

    return true;
}