bool SmallCapsIterator::consume(unsigned *capsLimit, SmallCapsBehavior* smallCapsBehavior) { if (m_atEnd) return false; while (m_utf16Iterator->consume(m_nextUChar32)) { m_previousSmallCapsBehavior = m_currentSmallCapsBehavior; // Skipping over combining marks, as these combine with the small-caps // uppercased text as well and we do not need to split by their // individual case-ness. if (!u_getCombiningClass(m_nextUChar32)) { m_currentSmallCapsBehavior = u_hasBinaryProperty(m_nextUChar32, UCHAR_CHANGES_WHEN_UPPERCASED) ? SmallCapsUppercaseNeeded : SmallCapsSameCase; } if (m_previousSmallCapsBehavior != m_currentSmallCapsBehavior && m_previousSmallCapsBehavior != SmallCapsInvalid) { *capsLimit = m_utf16Iterator->offset(); *smallCapsBehavior = m_previousSmallCapsBehavior; return true; } m_utf16Iterator->advance(); } *capsLimit = m_bufferSize; *smallCapsBehavior = m_currentSmallCapsBehavior; m_atEnd = true; return true; }
int32_t SpoofImpl::findHiddenOverlay(const UnicodeString& input, UErrorCode&) const { bool sawLeadCharacter = false; for (int32_t i=0; i<input.length();) { UChar32 cp = input.char32At(i); if (sawLeadCharacter && cp == 0x0307) { return i; } uint8_t combiningClass = u_getCombiningClass(cp); // Skip over characters except for those with combining class 0 (non-combining characters) or with // combining class 230 (same class as U+0307) U_ASSERT(u_getCombiningClass(0x0307) == 230); if (combiningClass == 0 || combiningClass == 230) { sawLeadCharacter = isIllegalCombiningDotLeadCharacter(cp); } i += U16_LENGTH(cp); } return -1; }
void SimpleFontInstance::getGlyphAdvance(LEGlyphID glyph, LEPoint &advance) const { #if 0 if (u_getCombiningClass((UChar32) glyph) == 0) { advance.fX = xUnitsToPoints(2048); } else { advance.fX = 0; } #else advance.fX = xUnitsToPoints(2048); #endif advance.fY = 0; }
void SimpleFontInstance::getGlyphAdvance(LEGlyphID glyph, LEPoint &advance) const { #if 0 if (u_getCombiningClass((UChar32) glyph) == 0) { advance.fX = xUnitsToPoints(2048); } else { advance.fX = 0; } #else (void)glyph; // Suppress unused parameter compiler warning. advance.fX = xUnitsToPoints(2048); #endif advance.fY = 0; }
UChar32 SurrogatePairAwareTextIterator::normalizeVoicingMarks() { // According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8; if (m_currentCharacter + 1 >= m_endCharacter) return 0; if (u_getCombiningClass(m_characters[1]) == hiraganaKatakanaVoicingMarksCombiningClass) { // Normalize into composed form using 3.2 rules. UChar normalizedCharacters[2] = { 0, 0 }; UErrorCode uStatus = U_ZERO_ERROR; int32_t resultLength = unorm_normalize(m_characters, 2, UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus); if (resultLength == 1 && !uStatus) return normalizedCharacters[0]; } return 0; }
U_CAPI int32_t U_EXPORT2 uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) { int32_t i = 0, j = 0; if(U_FAILURE(*status) || table->size == 0) { return 0; } table->position = 0; if(table->offsets != NULL) { uprv_free(table->offsets); } table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t)); if(table->offsets == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return 0; } /* See how much memory we need */ for(i = 0; i<table->size; i++) { table->offsets[i] = table->position+mainOffset; table->position += table->elements[i]->position; } /* Allocate it */ if(table->CEs != NULL) { uprv_free(table->CEs); } table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t)); if(table->CEs == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; uprv_free(table->offsets); table->offsets = NULL; return 0; } uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t)); if(table->codePoints != NULL) { uprv_free(table->codePoints); } table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar)); if(table->codePoints == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; uprv_free(table->offsets); table->offsets = NULL; uprv_free(table->CEs); table->CEs = NULL; return 0; } uprv_memset(table->codePoints, '?', table->position*sizeof(UChar)); /* Now stuff the things in*/ UChar *cpPointer = table->codePoints; uint32_t *CEPointer = table->CEs; for(i = 0; i<table->size; i++) { int32_t size = table->elements[i]->position; uint8_t ccMax = 0, ccMin = 255, cc = 0; for(j = 1; j<size; j++) { cc = u_getCombiningClass(table->elements[i]->codePoints[j]); if(cc>ccMax) { ccMax = cc; } if(cc<ccMin) { ccMin = cc; } *(cpPointer+j) = table->elements[i]->codePoints[j]; } *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax; uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t)); for(j = 0; j<size; j++) { if(isCntTableElement(*(CEPointer+j))) { *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]); } } cpPointer += size; CEPointer += size; } // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the // start of the flat file). However, what is done below is just wrong and it affects building of // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also, // keeping a list of code points that are contractions might be smart, although I'm not sure if it's // feasible. uint32_t CE; for(i = 0; i<=0x10FFFF; i++) { /*CE = ucmpe32_get(table->mapping, i);*/ CE = utrie_get32(table->mapping, i, NULL); if(isCntTableElement(CE)) { CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]); /*ucmpe32_set(table->mapping, i, CE);*/ utrie_set32(table->mapping, i, CE); } } return table->position; }
/** * Dumb recursive implementation of permutation. * TODO: optimize * @param source the string to find permutations for * @return the results in a set. */ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) { if(U_FAILURE(status)) { return; } //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source))); int32_t i = 0; // optimization: // if zero or one character, just return a set with it // we check for length < 2 to keep from counting code points all the time if (source.length() <= 2 && source.countChar32() <= 1) { UnicodeString *toPut = new UnicodeString(source); /* test for NULL */ if (toPut == 0) { status = U_MEMORY_ALLOCATION_ERROR; return; } result->put(source, toPut, status); return; } // otherwise iterate through the string, and recursively permute all the other characters UChar32 cp; Hashtable subpermute(status); if(U_FAILURE(status)) { return; } subpermute.setValueDeleter(uprv_deleteUObject); for (i = 0; i < source.length(); i += U16_LENGTH(cp)) { cp = source.char32At(i); const UHashElement *ne = NULL; int32_t el = UHASH_FIRST; UnicodeString subPermuteString = source; // optimization: // if the character is canonical combining class zero, // don't permute it if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) { //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i))); continue; } subpermute.removeAll(); // see what the permutations of the characters before and after this one are //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp))); permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status); /* Test for buffer overflows */ if(U_FAILURE(status)) { return; } // The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents // of source at this point. // prefix this character to all of them ne = subpermute.nextElement(el); while (ne != NULL) { UnicodeString *permRes = (UnicodeString *)(ne->value.pointer); UnicodeString *chStr = new UnicodeString(cp); //test for NULL if (chStr == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer)); //if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr)); result->put(*chStr, chStr, status); ne = subpermute.nextElement(el); } } //return result; }
static unsigned int hb_icu_get_combining_class (hb_codepoint_t unicode) { return u_getCombiningClass (unicode); }
U_CAPI int32_t U_EXPORT2 u_getIntPropertyValue(UChar32 c, UProperty which) { UErrorCode errorCode; if(which<UCHAR_BINARY_START) { return 0; /* undefined */ } else if(which<UCHAR_BINARY_LIMIT) { return (int32_t)u_hasBinaryProperty(c, which); } else if(which<UCHAR_INT_START) { return 0; /* undefined */ } else if(which<UCHAR_INT_LIMIT) { switch(which) { case UCHAR_BIDI_CLASS: return (int32_t)u_charDirection(c); case UCHAR_BLOCK: return (int32_t)ublock_getCode(c); #if !UCONFIG_NO_NORMALIZATION case UCHAR_CANONICAL_COMBINING_CLASS: return u_getCombiningClass(c); #endif case UCHAR_DECOMPOSITION_TYPE: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK); case UCHAR_EAST_ASIAN_WIDTH: return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT; case UCHAR_GENERAL_CATEGORY: return (int32_t)u_charType(c); case UCHAR_JOINING_GROUP: return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); case UCHAR_JOINING_TYPE: return ubidi_getJoiningType(GET_BIDI_PROPS(), c); case UCHAR_LINE_BREAK: return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT; case UCHAR_NUMERIC_TYPE: { int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1)); return UPROPS_NTV_GET_TYPE(ntv); } case UCHAR_SCRIPT: errorCode=U_ZERO_ERROR; return (int32_t)uscript_getScript(c, &errorCode); case UCHAR_HANGUL_SYLLABLE_TYPE: { /* see comments on gcbToHst[] above */ int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; if(gcb<LENGTHOF(gcbToHst)) { return gcbToHst[gcb]; } else { return U_HST_NOT_APPLICABLE; } } #if !UCONFIG_NO_NORMALIZATION case UCHAR_NFD_QUICK_CHECK: case UCHAR_NFKD_QUICK_CHECK: case UCHAR_NFC_QUICK_CHECK: case UCHAR_NFKC_QUICK_CHECK: return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); case UCHAR_LEAD_CANONICAL_COMBINING_CLASS: return getFCD16(c)>>8; case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS: return getFCD16(c)&0xff; #endif case UCHAR_GRAPHEME_CLUSTER_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; case UCHAR_SENTENCE_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT; case UCHAR_WORD_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT; default: return 0; /* undefined */ } } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
uint8_t __hs_u_getCombiningClass(UChar32 c) { return u_getCombiningClass(c); }