bool SmallCapsIterator::consume(unsigned *capsLimit, SmallCapsBehavior* smallCapsBehavior)
{
    if (m_atEnd)
        return false;

    while (m_utf16Iterator->consume(m_nextUChar32)) {
        m_previousSmallCapsBehavior = m_currentSmallCapsBehavior;
        // Skipping over combining marks, as these combine with the small-caps
        // uppercased text as well and we do not need to split by their
        // individual case-ness.
        if (!u_getCombiningClass(m_nextUChar32)) {
            m_currentSmallCapsBehavior = u_hasBinaryProperty(m_nextUChar32, UCHAR_CHANGES_WHEN_UPPERCASED)
                ? SmallCapsUppercaseNeeded
                : SmallCapsSameCase;
        }

        if (m_previousSmallCapsBehavior != m_currentSmallCapsBehavior
            && m_previousSmallCapsBehavior != SmallCapsInvalid) {
            *capsLimit = m_utf16Iterator->offset();
            *smallCapsBehavior = m_previousSmallCapsBehavior;
            return true;
        }
        m_utf16Iterator->advance();
    }
    *capsLimit = m_bufferSize;
    *smallCapsBehavior = m_currentSmallCapsBehavior;
    m_atEnd = true;
    return true;
}
Ejemplo n.º 2
0
int32_t SpoofImpl::findHiddenOverlay(const UnicodeString& input, UErrorCode&) const {
    bool sawLeadCharacter = false;
    for (int32_t i=0; i<input.length();) {
        UChar32 cp = input.char32At(i);
        if (sawLeadCharacter && cp == 0x0307) {
            return i;
        }
        uint8_t combiningClass = u_getCombiningClass(cp);
        // Skip over characters except for those with combining class 0 (non-combining characters) or with
        // combining class 230 (same class as U+0307)
        U_ASSERT(u_getCombiningClass(0x0307) == 230);
        if (combiningClass == 0 || combiningClass == 230) {
            sawLeadCharacter = isIllegalCombiningDotLeadCharacter(cp);
        }
        i += U16_LENGTH(cp);
    }
    return -1;
}
Ejemplo n.º 3
0
void SimpleFontInstance::getGlyphAdvance(LEGlyphID glyph, LEPoint &advance) const
{
#if 0
    if (u_getCombiningClass((UChar32) glyph) == 0) {
        advance.fX = xUnitsToPoints(2048);
    } else {
        advance.fX = 0;
    }
#else
    advance.fX = xUnitsToPoints(2048);
#endif

    advance.fY = 0;
}
Ejemplo n.º 4
0
void SimpleFontInstance::getGlyphAdvance(LEGlyphID glyph, LEPoint &advance) const
{
#if 0
    if (u_getCombiningClass((UChar32) glyph) == 0) {
        advance.fX = xUnitsToPoints(2048);
    } else {
        advance.fX = 0;
    }
#else
    (void)glyph;  // Suppress unused parameter compiler warning.
    advance.fX = xUnitsToPoints(2048);
#endif

    advance.fY = 0;
}
Ejemplo n.º 5
0
UChar32 SurrogatePairAwareTextIterator::normalizeVoicingMarks()
{
    // According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values
    static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8;

    if (m_currentCharacter + 1 >= m_endCharacter)
        return 0;

    if (u_getCombiningClass(m_characters[1]) == hiraganaKatakanaVoicingMarksCombiningClass) {
        // Normalize into composed form using 3.2 rules.
        UChar normalizedCharacters[2] = { 0, 0 };
        UErrorCode uStatus = U_ZERO_ERROR;  
        int32_t resultLength = unorm_normalize(m_characters, 2, UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus);
        if (resultLength == 1 && !uStatus)
            return normalizedCharacters[0];
    }

    return 0;
}
Ejemplo n.º 6
0
U_CAPI int32_t  U_EXPORT2
uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
    int32_t i = 0, j = 0;
    if(U_FAILURE(*status) || table->size == 0) {
        return 0;
    }

    table->position = 0;

    if(table->offsets != NULL) {
        uprv_free(table->offsets);
    }
    table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
    if(table->offsets == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        return 0;
    }


    /* See how much memory we need */
    for(i = 0; i<table->size; i++) {
        table->offsets[i] = table->position+mainOffset;
        table->position += table->elements[i]->position;
    }

    /* Allocate it */
    if(table->CEs != NULL) {
        uprv_free(table->CEs);
    }
    table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
    if(table->CEs == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        uprv_free(table->offsets);
        table->offsets = NULL;
        return 0;
    }
    uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));

    if(table->codePoints != NULL) {
        uprv_free(table->codePoints);
    }
    table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
    if(table->codePoints == NULL) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        uprv_free(table->offsets);
        table->offsets = NULL;
        uprv_free(table->CEs);
        table->CEs = NULL;
        return 0;
    }
    uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));

    /* Now stuff the things in*/

    UChar *cpPointer = table->codePoints;
    uint32_t *CEPointer = table->CEs;
    for(i = 0; i<table->size; i++) {
        int32_t size = table->elements[i]->position;
        uint8_t ccMax = 0, ccMin = 255, cc = 0;
        for(j = 1; j<size; j++) {
            cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
            if(cc>ccMax) {
                ccMax = cc;
            }
            if(cc<ccMin) {
                ccMin = cc;
            }
            *(cpPointer+j) = table->elements[i]->codePoints[j];
        }
        *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;

        uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
        for(j = 0; j<size; j++) {
            if(isCntTableElement(*(CEPointer+j))) {
                *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
            }
        }
        cpPointer += size;
        CEPointer += size;
    }

    // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the 
    // start of the flat file). However, what is done below is just wrong and it affects building of 
    // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
    // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
    // feasible.
    uint32_t CE;
    for(i = 0; i<=0x10FFFF; i++) {
        /*CE = ucmpe32_get(table->mapping, i);*/
        CE = utrie_get32(table->mapping, i, NULL);
        if(isCntTableElement(CE)) {
            CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
            /*ucmpe32_set(table->mapping, i, CE);*/
            utrie_set32(table->mapping, i, CE);
        }
    }


    return table->position;
}
Ejemplo n.º 7
0
/**
 * Dumb recursive implementation of permutation.
 * TODO: optimize
 * @param source the string to find permutations for
 * @return the results in a set.
 */
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
    if(U_FAILURE(status)) {
        return;
    }
    //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
    int32_t i = 0;

    // optimization:
    // if zero or one character, just return a set with it
    // we check for length < 2 to keep from counting code points all the time
    if (source.length() <= 2 && source.countChar32() <= 1) {
        UnicodeString *toPut = new UnicodeString(source);
        /* test for NULL */
        if (toPut == 0) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        result->put(source, toPut, status);
        return;
    }

    // otherwise iterate through the string, and recursively permute all the other characters
    UChar32 cp;
    Hashtable subpermute(status);
    if(U_FAILURE(status)) {
        return;
    }
    subpermute.setValueDeleter(uprv_deleteUObject);

    for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
        cp = source.char32At(i);
        const UHashElement *ne = NULL;
        int32_t el = UHASH_FIRST;
        UnicodeString subPermuteString = source;

        // optimization:
        // if the character is canonical combining class zero,
        // don't permute it
        if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
            //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
            continue;
        }

        subpermute.removeAll();

        // see what the permutations of the characters before and after this one are
        //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
        permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
        /* Test for buffer overflows */
        if(U_FAILURE(status)) {
            return;
        }
        // The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents 
        // of source at this point.

        // prefix this character to all of them
        ne = subpermute.nextElement(el);
        while (ne != NULL) {
            UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
            UnicodeString *chStr = new UnicodeString(cp);
            //test for  NULL
            if (chStr == NULL) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
            chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
            //if (PROGRESS) printf("  Piece: %s\n", UToS(*chStr));
            result->put(*chStr, chStr, status);
            ne = subpermute.nextElement(el);
        }
    }
    //return result;
}
Ejemplo n.º 8
0
static unsigned int hb_icu_get_combining_class (hb_codepoint_t unicode) { return u_getCombiningClass (unicode); }
Ejemplo n.º 9
0
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue(UChar32 c, UProperty which) {
    UErrorCode errorCode;

    if(which<UCHAR_BINARY_START) {
        return 0; /* undefined */
    } else if(which<UCHAR_BINARY_LIMIT) {
        return (int32_t)u_hasBinaryProperty(c, which);
    } else if(which<UCHAR_INT_START) {
        return 0; /* undefined */
    } else if(which<UCHAR_INT_LIMIT) {
        switch(which) {
        case UCHAR_BIDI_CLASS:
            return (int32_t)u_charDirection(c);
        case UCHAR_BLOCK:
            return (int32_t)ublock_getCode(c);
#if !UCONFIG_NO_NORMALIZATION
        case UCHAR_CANONICAL_COMBINING_CLASS:
            return u_getCombiningClass(c);
#endif
        case UCHAR_DECOMPOSITION_TYPE:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK);
        case UCHAR_EAST_ASIAN_WIDTH:
            return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT;
        case UCHAR_GENERAL_CATEGORY:
            return (int32_t)u_charType(c);
        case UCHAR_JOINING_GROUP:
            return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
        case UCHAR_JOINING_TYPE:
            return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
        case UCHAR_LINE_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT;
        case UCHAR_NUMERIC_TYPE: {
            int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1));
            return UPROPS_NTV_GET_TYPE(ntv);
        }
        case UCHAR_SCRIPT:
            errorCode=U_ZERO_ERROR;
            return (int32_t)uscript_getScript(c, &errorCode);
        case UCHAR_HANGUL_SYLLABLE_TYPE: {
            /* see comments on gcbToHst[] above */
            int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
            if(gcb<LENGTHOF(gcbToHst)) {
                return gcbToHst[gcb];
            } else {
                return U_HST_NOT_APPLICABLE;
            }
        }
#if !UCONFIG_NO_NORMALIZATION
        case UCHAR_NFD_QUICK_CHECK:
        case UCHAR_NFKD_QUICK_CHECK:
        case UCHAR_NFC_QUICK_CHECK:
        case UCHAR_NFKC_QUICK_CHECK:
            return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
        case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
            return getFCD16(c)>>8;
        case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
            return getFCD16(c)&0xff;
#endif
        case UCHAR_GRAPHEME_CLUSTER_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
        case UCHAR_SENTENCE_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT;
        case UCHAR_WORD_BREAK:
            return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT;
        default:
            return 0; /* undefined */
        }
    } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
Ejemplo n.º 10
0
uint8_t __hs_u_getCombiningClass(UChar32 c)
{
    return u_getCombiningClass(c);
}