static void TestAppend() { static const UChar32 codePoints[]={ 0x61, 0xdf, 0x901, 0x3040, 0xac00, 0xd800, 0xdbff, 0xdcde, 0xdffd, 0xe000, 0xffff, 0x10000, 0x12345, 0xe0021, 0x10ffff, 0x110000, 0x234567, 0x7fffffff, -1, -1000, 0, 0x400 }; static const UChar expectUnsafe[]={ 0x61, 0xdf, 0x901, 0x3040, 0xac00, 0xd800, 0xdbff, 0xdcde, 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00, 0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */ /* none from this line */ 0, 0x400 }, expectSafe[]={ 0x61, 0xdf, 0x901, 0x3040, 0xac00, 0xd800, 0xdbff, 0xdcde, 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00, 0xd848, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */ /* none from this line */ 0, 0x400 }; UChar buffer[100]; UChar32 c; int32_t i, length; UBool isError, expectIsError, wrongIsError; length=0; for(i=0; i<LENGTHOF(codePoints); ++i) { c=codePoints[i]; if(c<0 || 0x10ffff<c) { continue; /* skip non-code points for U16_APPEND_UNSAFE */ } U16_APPEND_UNSAFE(buffer, length, c); } if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length*U_SIZEOF_UCHAR)) { log_err("U16_APPEND_UNSAFE did not generate the expected output\n"); } length=0; wrongIsError=FALSE; for(i=0; i<LENGTHOF(codePoints); ++i) { c=codePoints[i]; expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c); isError=FALSE; U16_APPEND(buffer, length, LENGTHOF(buffer), c, isError); wrongIsError|= isError!=expectIsError; } if(wrongIsError) { log_err("U16_APPEND did not set isError correctly\n"); } if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length*U_SIZEOF_UCHAR)) { log_err("U16_APPEND did not generate the expected output\n"); } }
inline void append(UChar32 x) { if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) { fDest->addLiteral(fBuffer, 0, fIdx); fIdx = 0; } U16_APPEND_UNSAFE(fBuffer, fIdx, x); }
/* * parse a list of code points * store them as a string in dest[destCapacity] * set the first code point in *pFirst * @return The length of the string in numbers of UChars. */ U_CAPI int32_t U_EXPORT2 u_parseString(const char *s, UChar *dest, int32_t destCapacity, uint32_t *pFirst, UErrorCode *pErrorCode) { char *end; uint32_t value; int32_t destLength; if(U_FAILURE(*pErrorCode)) { return 0; } if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(pFirst!=NULL) { *pFirst=0xffffffff; } destLength=0; for(;;) { s=u_skipWhitespace(s); if(*s==';' || *s==0) { if(destLength<destCapacity) { dest[destLength]=0; } else if(destLength==destCapacity) { *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; } else { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; } return destLength; } /* read one code point */ value=(uint32_t)uprv_strtoul(s, &end, 16); if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { *pErrorCode=U_PARSE_ERROR; return 0; } /* store the first code point */ if(pFirst!=NULL) { *pFirst=value; pFirst=NULL; } /* append it to the destination array */ if((destLength+U16_LENGTH(value))<=destCapacity) { U16_APPEND_UNSAFE(dest, destLength, value); } else { destLength+=U16_LENGTH(value); } /* go to the following characters */ s=end; } }
/* Do an invariant conversion of char* -> UChar*, with escape parsing */ U_CAPI int32_t U_EXPORT2 u_unescape(const char *src, UChar *dest, int32_t destCapacity) { const char *segment = src; int32_t i = 0; char c; while ((c=*src) != 0) { /* '\\' intentionally written as compiler-specific * character constant to correspond to compiler-specific * char* constants. */ if (c == '\\') { int32_t lenParsed = 0; UChar32 c32; if (src != segment) { if (dest != NULL) { _appendUChars(dest + i, destCapacity - i, segment, (int32_t)(src - segment)); } i += (int32_t)(src - segment); } ++src; /* advance past '\\' */ c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src); if (lenParsed == 0) { goto err; } src += lenParsed; /* advance past escape seq. */ if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) { U16_APPEND_UNSAFE(dest, i, c32); } else { i += U16_LENGTH(c32); } segment = src; } else { ++src; } } if (src != segment) { if (dest != NULL) { _appendUChars(dest + i, destCapacity - i, segment, (int32_t)(src - segment)); } i += (int32_t)(src - segment); } if (dest != NULL && i < destCapacity) { dest[i] = 0; } return i; err: if (dest != NULL && destCapacity > 0) { *dest = 0; } return 0; }
static hb_bool_t getGlyph(hb_font_t* hbFont, void* fontData, hb_codepoint_t unicode, hb_codepoint_t variationSelector, hb_codepoint_t* glyph, void* userData) { CTFontRef ctFont = reinterpret_cast<FontPlatformData*>(fontData)->ctFont(); UniChar characters[4]; CGGlyph cgGlyphs[4]; size_t length = 0; U16_APPEND_UNSAFE(characters, length, unicode); if (!CTFontGetGlyphsForCharacters(ctFont, characters, cgGlyphs, length)) return false; *glyph = cgGlyphs[0]; return true; }
static void testIsPotentialCustomElementNameChar(UChar32 c, bool expected) { LChar str8[] = "a-X"; UChar str16[] = {'a', '-', 'X', '\0', '\0'}; AtomicString str; if (c <= 0xFF) { str8[2] = c; str = str8; } else { size_t i = 2; U16_APPEND_UNSAFE(str16, i, c); str16[i] = 0; str = str16; } testIsPotentialCustomElementName(str, expected); }
/* * Remove toUnicode fallbacks and non-<subchar1> SUB mappings * which are irrelevant for the fromUnicode extension table. * Remove MBCS_FROM_U_EXT_FLAG bits. * Overwrite the reverseMap with an index array to the relevant mappings. * Modify the code point sequences to a generator-friendly format where * the first code points remains unchanged but the following are recoded * into 16-bit Unicode string form. * The table must be sorted. * Destroys previous data in the reverseMap. */ static int32_t prepareFromUMappings(UCMTable *table) { UCMapping *mappings, *m; int32_t *map; int32_t i, j, count; int8_t flag; mappings=table->mappings; map=table->reverseMap; count=table->mappingsLength; /* * we do not go through the map on input because the mappings are * sorted lexically */ m=mappings; for(i=j=0; i<count; ++m, ++i) { flag=m->f; if(flag>=0) { flag&=MBCS_FROM_U_EXT_MASK; m->f=flag; } if(flag==0 || flag==1 || (flag==2 && m->bLen==1) || flag==4) { map[j++]=i; if(m->uLen>1) { /* recode all but the first code point to 16-bit Unicode */ UChar32 *u32; UChar *u; UChar32 c; int32_t q, r; u32=UCM_GET_CODE_POINTS(table, m); u=(UChar *)u32; /* destructive in-place recoding */ for(r=2, q=1; q<m->uLen; ++q) { c=u32[q]; U16_APPEND_UNSAFE(u, r, c); } /* counts the first code point always at 2 - the first 16-bit unit is at 16-bit index 2 */ m->uLen=(int8_t)r; } } } return j; }
void ICUUnicodeSupport::_toLowerCase<2>(StringHolder<2> _str) { if(!_str.empty()) { uint16_t* buf = &_str[0]; int32_t len = _str.length(); int32_t ofs = 0, ofs2 = 0; while(ofs != len) { UChar32 c; U16_NEXT(buf, ofs, len, c); c = u_tolower(c); U16_APPEND_UNSAFE( buf, ofs2, c); } } }
/** * Decode a BOCU-1 byte sequence to a UTF-16 string. * Does not check for overflows, but otherwise useful function. * * @param p pointer to input BOCU-1 bytes * @param length number of input bytes * @param s point to output UTF-16 string array * @return number of UChar code units output */ static int32_t readString(const uint8_t *p, int32_t length, UChar *s) { Bocu1Rx rx={ 0, 0, 0 }; int32_t c, i, sLength; i=sLength=0; while(i<length) { c=decodeBocu1(&rx, p[i++]); if(c<-1) { log_err("error: readString detects encoding error at string index %ld\n", i); return -1; } if(c>=0) { U16_APPEND_UNSAFE(s, sLength, c); } } return sLength; }
U_CAPI int32_t U_EXPORT2 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return 0; } if(capacity < 0 || (capacity > 0 && dest == NULL)) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return 0; } int32_t sampleChar = getScriptProps(script) & 0x1fffff; int32_t length; if(sampleChar == 0) { length = 0; } else { length = U16_LENGTH(sampleChar); if(length <= capacity) { int32_t i = 0; U16_APPEND_UNSAFE(dest, i, sampleChar); } } return u_terminateUChars(dest, capacity, length, pErrorCode); }
static void doCaseConvert( XMLCh* convertString, FunctionType caseFunction) { // Note the semantics of this function are broken, since it's // possible that changing the case of a string could increase // its length, but there's no way to handle such a situation. const unsigned int len = XMLString::stringLen(convertString); size_t readPos = 0; size_t writePos = 0; while(readPos < len) { UChar32 original; // Get the next Unicode code point. U16_NEXT_UNSAFE(convertString, readPos, original); // Convert the code point const UChar32 converted = caseFunction(original); // OK, now here's where it gets ugly. if (!U_IS_BMP(converted) && U_IS_BMP(original) && readPos - writePos == 1) { // We do not have room to convert the // character without overwriting the next // character, so we will just stop. break; } else { U16_APPEND_UNSAFE(convertString, writePos, converted); } } convertString[writePos] = 0; }
/** * Performs character mirroring. * * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. * @param pErrorCode Pointer to the error code value. * * @return Whether or not this function modifies the text. Besides the return * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. */ static UBool action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) { UChar32 c; uint32_t i = 0, j = 0; if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { return FALSE; } if (pTransform->destSize < pTransform->srcLength) { *pErrorCode = U_BUFFER_OVERFLOW_ERROR; return FALSE; } do { UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; U16_NEXT(pTransform->src, i, pTransform->srcLength, c); U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); } while (i < pTransform->srcLength); *pTransform->pDestLength = pTransform->srcLength; pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; return TRUE; }
static void addUnfolding(UChar32 c, const UChar *s, int32_t length) { int32_t i; if(length>UGENCASE_UNFOLD_STRING_WIDTH) { fprintf(stderr, "gencase error: case folding too long (length=%ld>%d=UGENCASE_UNFOLD_STRING_WIDTH)\n", (long)length, UGENCASE_UNFOLD_STRING_WIDTH); exit(U_INTERNAL_PROGRAM_ERROR); } if(unfoldTop >= (LENGTHOF(unfold) - UGENCASE_UNFOLD_STRING_WIDTH)) { fprintf(stderr, "gencase error: too many multi-character case foldings\n"); exit(U_BUFFER_OVERFLOW_ERROR); } u_memset(unfold+unfoldTop, 0, UGENCASE_UNFOLD_WIDTH); u_memcpy(unfold+unfoldTop, s, length); i=unfoldTop+UGENCASE_UNFOLD_STRING_WIDTH; U16_APPEND_UNSAFE(unfold, i, c); ++unfoldRows; unfoldTop+=UGENCASE_UNFOLD_WIDTH; }
static int toIDNA2003(const UStringPrepProfile *prep, UChar32 c, icu::UnicodeString &destString) { UChar src[2]; int32_t srcLength=0; U16_APPEND_UNSAFE(src, srcLength, c); UChar *dest; int32_t destLength; dest=destString.getBuffer(32); if(dest==NULL) { return FALSE; } UErrorCode errorCode=U_ZERO_ERROR; destLength=usprep_prepare(prep, src, srcLength, dest, destString.getCapacity(), USPREP_DEFAULT, NULL, &errorCode); destString.releaseBuffer(destLength); if(errorCode==U_STRINGPREP_PROHIBITED_ERROR) { return -1; } else { // Returns FALSE=0 for U_STRINGPREP_UNASSIGNED_ERROR and processing errors, // TRUE=1 if c is valid or mapped. return U_SUCCESS(errorCode); } }
static void testTrieIteration(const char *testName, const UTrie *trie, const CheckRange checkRanges[], int32_t countCheckRanges) { UChar s[100]; uint32_t values[30]; const UChar *p, *limit; uint32_t value; UChar32 c; int32_t i, length, countValues; UChar c2; /* write a string */ length=countValues=0; for(i=0; i<countCheckRanges; ++i) { c=checkRanges[i].limit; if(c!=0) { --c; U16_APPEND_UNSAFE(s, length, c); values[countValues++]=checkRanges[i].value; } } limit=s+length; /* try forward */ p=s; i=0; while(p<limit) { c=c2=0x33; if(trie->data32!=NULL) { UTRIE_NEXT32(trie, p, limit, c, c2, value); } else { UTRIE_NEXT16(trie, p, limit, c, c2, value); } if(value!=values[i]) { log_err("error: wrong value from UTRIE_NEXT(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n", testName, c, c2, value, values[i]); } if( c2==0 ? c!=*(p-1) : !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*(p-2) || c2!=*(p-1) ) { log_err("error: wrong (c, c2) from UTRIE_NEXT(%s): (U+%04lx, U+%04lx)\n", testName, c, c2); continue; } if(c2!=0) { int32_t offset; if(trie->data32==NULL) { value=UTRIE_GET16_FROM_LEAD(trie, c); offset=trie->getFoldingOffset(value); if(offset>0) { value=UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2); } else { value=trie->initialValue; } } else { value=UTRIE_GET32_FROM_LEAD(trie, c); offset=trie->getFoldingOffset(value); if(offset>0) { value=UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2); } else { value=trie->initialValue; } } if(value!=values[i]) { log_err("error: wrong value from UTRIE_GETXX_FROM_OFFSET_TRAIL(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n", testName, c, c2, value, values[i]); } } if(c2!=0) { value=0x44; if(trie->data32==NULL) { UTRIE_GET16_FROM_PAIR(trie, c, c2, value); } else { UTRIE_GET32_FROM_PAIR(trie, c, c2, value); } if(value!=values[i]) { log_err("error: wrong value from UTRIE_GETXX_FROM_PAIR(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n", testName, c, c2, value, values[i]); } } ++i; } /* try backward */ p=limit; i=countValues; while(s<p) { --i; c=c2=0x33; if(trie->data32!=NULL) { UTRIE_PREVIOUS32(trie, s, p, c, c2, value); } else { UTRIE_PREVIOUS16(trie, s, p, c, c2, value); } if(value!=values[i]) { log_err("error: wrong value from UTRIE_PREVIOUS(%s)(U+%04lx, U+%04lx): 0x%lx instead of 0x%lx\n", testName, c, c2, value, values[i]); } if( c2==0 ? c!=*p: !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*p || c2!=*(p+1) ) { log_err("error: wrong (c, c2) from UTRIE_PREVIOUS(%s): (U+%04lx, U+%04lx)\n", testName, c, c2); } } }
/* internal function */ U_CFUNC int32_t u_strcmpFold(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode) { const UCaseProps *csp; /* current-level start/limit - s1/s2 as current */ const UChar *start1, *start2, *limit1, *limit2; /* case folding variables */ const UChar *p; int32_t length; /* stacks of previous-level start/current/limit */ CmpEquivLevel stack1[2], stack2[2]; /* case folding buffers, only use current-level start/limit */ UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; /* track which is the current level per string */ int32_t level1, level2; /* current code units, and code points for lookups */ UChar32 c1, c2, cp1, cp2; /* no argument error checking because this itself is not an API */ /* * assume that at least the option U_COMPARE_IGNORE_CASE is set * otherwise this function would have to behave exactly as uprv_strCompare() */ csp=ucase_getSingleton(); if(U_FAILURE(*pErrorCode)) { return 0; } /* initialize */ start1=s1; if(length1==-1) { limit1=NULL; } else { limit1=s1+length1; } start2=s2; if(length2==-1) { limit2=NULL; } else { limit2=s2+length2; } level1=level2=0; c1=c2=-1; /* comparison loop */ for(;;) { /* * here a code unit value of -1 means "get another code unit" * below it will mean "this source is finished" */ if(c1<0) { /* get next code unit from string 1, post-increment */ for(;;) { if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { if(level1==0) { c1=-1; break; } } else { ++s1; break; } /* reached end of level buffer, pop one level */ do { --level1; start1=stack1[level1].start; } while(start1==NULL); s1=stack1[level1].s; limit1=stack1[level1].limit; } } if(c2<0) { /* get next code unit from string 2, post-increment */ for(;;) { if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { if(level2==0) { c2=-1; break; } } else { ++s2; break; } /* reached end of level buffer, pop one level */ do { --level2; start2=stack2[level2].start; } while(start2==NULL); s2=stack2[level2].s; limit2=stack2[level2].limit; } } /* * compare c1 and c2 * either variable c1, c2 is -1 only if the corresponding string is finished */ if(c1==c2) { if(c1<0) { return 0; /* c1==c2==-1 indicating end of strings */ } c1=c2=-1; /* make us fetch new code units */ continue; } else if(c1<0) { return -1; /* string 1 ends before string 2 */ } else if(c2<0) { return 1; /* string 2 ends before string 1 */ } /* c1!=c2 && c1>=0 && c2>=0 */ /* get complete code points for c1, c2 for lookups if either is a surrogate */ cp1=c1; if(U_IS_SURROGATE(c1)) { UChar c; if(U_IS_SURROGATE_LEAD(c1)) { if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { /* advance ++s1; only below if cp1 decomposes/case-folds */ cp1=U16_GET_SUPPLEMENTARY(c1, c); } } else /* isTrail(c1) */ { if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { cp1=U16_GET_SUPPLEMENTARY(c, c1); } } } cp2=c2; if(U_IS_SURROGATE(c2)) { UChar c; if(U_IS_SURROGATE_LEAD(c2)) { if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { /* advance ++s2; only below if cp2 decomposes/case-folds */ cp2=U16_GET_SUPPLEMENTARY(c2, c); } } else /* isTrail(c2) */ { if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { cp2=U16_GET_SUPPLEMENTARY(c, c2); } } } /* * go down one level for each string * continue with the main loop as soon as there is a real change */ if( level1==0 && (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 ) { /* cp1 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c1)) { if(U_IS_SURROGATE_LEAD(c1)) { /* advance beyond source surrogate pair if it case-folds */ ++s1; } else /* isTrail(c1) */ { /* * we got a supplementary code point when hitting its trail surrogate, * therefore the lead surrogate must have been the same as in the other string; * compare this decomposition with the lead surrogate in the other string * remember that this simulates bulk text replacement: * the decomposition would replace the entire code point */ --s2; c2=*(s2-1); } } /* push current level pointers */ stack1[0].start=start1; stack1[0].s=s1; stack1[0].limit=limit1; ++level1; /* copy the folding result to fold1[] */ if(length<=UCASE_MAX_STRING_LENGTH) { u_memcpy(fold1, p, length); } else { int32_t i=0; U16_APPEND_UNSAFE(fold1, i, length); length=i; } /* set next level pointers to case folding */ start1=s1=fold1; limit1=fold1+length; /* get ready to read from decomposition, continue with loop */ c1=-1; continue; } if( level2==0 && (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 ) { /* cp2 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c2)) { if(U_IS_SURROGATE_LEAD(c2)) { /* advance beyond source surrogate pair if it case-folds */ ++s2; } else /* isTrail(c2) */ { /* * we got a supplementary code point when hitting its trail surrogate, * therefore the lead surrogate must have been the same as in the other string; * compare this decomposition with the lead surrogate in the other string * remember that this simulates bulk text replacement: * the decomposition would replace the entire code point */ --s1; c1=*(s1-1); } } /* push current level pointers */ stack2[0].start=start2; stack2[0].s=s2; stack2[0].limit=limit2; ++level2; /* copy the folding result to fold2[] */ if(length<=UCASE_MAX_STRING_LENGTH) { u_memcpy(fold2, p, length); } else { int32_t i=0; U16_APPEND_UNSAFE(fold2, i, length); length=i; } /* set next level pointers to case folding */ start2=s2=fold2; limit2=fold2+length; /* get ready to read from decomposition, continue with loop */ c2=-1; continue; } /* * no decomposition/case folding, max level for both sides: * return difference result * * code point order comparison must not just return cp1-cp2 * because when single surrogates are present then the surrogate pairs * that formed cp1 and cp2 may be from different string indexes * * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units * c1=d800 cp1=10001 c2=dc00 cp2=10000 * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } * * therefore, use same fix-up as in ustring.c/uprv_strCompare() * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ * so we have slightly different pointer/start/limit comparisons here */ if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ if( (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c1-=0x2800; } if( (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c2-=0x2800; } } return c1-c2; } }
extern void storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UStringPrepType type, UErrorCode* status){ UChar* map = NULL; int16_t adjustedLen=0, i, j; uint16_t trieWord = 0; ValueStruct *value = NULL; uint32_t savedTrieWord = 0; /* initialize the hashtable */ if(hashTable==NULL){ hashTable = uhash_open(hashEntry, compareEntries, NULL, status); uhash_setValueDeleter(hashTable, valueDeleter); } /* figure out if the code point has type already stored */ savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); if(savedTrieWord!=0){ if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ /* turn on the first bit in trie word */ trieWord += 0x01; }else{ /* * the codepoint has value something other than prohibited * and a mapping .. error! */ fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); exit(U_ILLEGAL_ARGUMENT_ERROR); } } /* figure out the real length */ for(i=0; i<length; i++){ adjustedLen += U16_LENGTH(mapping[i]); } if(adjustedLen == 0){ trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2); /* make sure that the value of trieWord is less than the threshold */ if(trieWord < _SPREP_TYPE_THRESHOLD){ /* now set the value in the trie */ if(!utrie_set32(sprepTrie,codepoint,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* value is set so just return */ return; }else{ fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); exit(U_ILLEGAL_CHAR_FOUND); } } if(adjustedLen == 1){ /* calculate the delta */ int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]); if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){ trieWord = delta << 2; /* make sure that the second bit is OFF */ if((trieWord & 0x02) != 0 ){ fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n"); exit(U_INTERNAL_PROGRAM_ERROR); } /* make sure that the value of trieWord is less than the threshold */ if(trieWord < _SPREP_TYPE_THRESHOLD){ /* now set the value in the trie */ if(!utrie_set32(sprepTrie,codepoint,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* value is set so just return */ return; } } /* * if the delta is not in the given range or if the trieWord is larger than the threshold * just fall through for storing the mapping in the mapping table */ } map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR); for (i=0, j=0; i<length; i++) { U16_APPEND_UNSAFE(map, j, mapping[i]); } value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct)); value->mapping = map; value->type = type; value->length = adjustedLen; if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){ mappingDataCapacity++; } if(maxLength < value->length){ maxLength = value->length; } uhash_iput(hashTable,codepoint,value,status); mappingDataCapacity += adjustedLen; if(U_FAILURE(*status)){ fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status)); exit(*status); } }
/* * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we * semantically write RTL runs in reverse and later reverse them again. * Instead, we actually write them in forward order to begin with. * However, if the RTL run was to be mirrored, we need to mirror here now * since the implicit second reversal must not do it. * It looks strange to do mirroring in LTR output, but it is only because * we are writing RTL output in reverse. */ static int32_t doWriteForward(const UChar *src, int32_t srcLength, UChar *dest, int32_t destSize, uint16_t options, UErrorCode *pErrorCode) { /* optimize for several combinations of options */ switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) { case 0: { /* simply copy the LTR run to the destination */ int32_t length=srcLength; if(destSize<length) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return srcLength; } do { *dest++=*src++; } while(--length>0); return srcLength; } case UBIDI_DO_MIRRORING: { /* do mirroring */ int32_t i=0, j=0; UChar32 c; if(destSize<srcLength) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return srcLength; } do { U16_NEXT(src, i, srcLength, c); c=u_charMirror(c); U16_APPEND_UNSAFE(dest, j, c); } while(i<srcLength); return srcLength; } case UBIDI_REMOVE_BIDI_CONTROLS: { /* copy the LTR run and remove any BiDi control characters */ int32_t remaining=destSize; UChar c; do { c=*src++; if(!IS_BIDI_CONTROL_CHAR(c)) { if(--remaining<0) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; /* preflight the length */ while(--srcLength>0) { c=*src++; if(!IS_BIDI_CONTROL_CHAR(c)) { --remaining; } } return destSize-remaining; } *dest++=c; } } while(--srcLength>0); return destSize-remaining; } default: { /* remove BiDi control characters and do mirroring */ int32_t remaining=destSize; int32_t i, j=0; UChar32 c; do { i=0; U16_NEXT(src, i, srcLength, c); src+=i; srcLength-=i; if(!IS_BIDI_CONTROL_CHAR(c)) { remaining-=i; if(remaining<0) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; /* preflight the length */ while(srcLength>0) { c=*src++; if(!IS_BIDI_CONTROL_CHAR(c)) { --remaining; } --srcLength; } return destSize-remaining; } c=u_charMirror(c); U16_APPEND_UNSAFE(dest, j, c); } } while(srcLength>0); return j; } } /* end of switch */ }
static int32_t doWriteReverse(const UChar *src, int32_t srcLength, UChar *dest, int32_t destSize, uint16_t options, UErrorCode *pErrorCode) { /* * RTL run - * * RTL runs need to be copied to the destination in reverse order * of code points, not code units, to keep Unicode characters intact. * * The general strategy for this is to read the source text * in backward order, collect all code units for a code point * (and optionally following combining characters, see below), * and copy all these code units in ascending order * to the destination for this run. * * Several options request whether combining characters * should be kept after their base characters, * whether BiDi control characters should be removed, and * whether characters should be replaced by their mirror-image * equivalent Unicode characters. */ int32_t i, j; UChar32 c; /* optimize for several combinations of options */ switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) { case 0: /* * With none of the "complicated" options set, the destination * run will have the same length as the source run, * and there is no mirroring and no keeping combining characters * with their base characters. */ if(destSize<srcLength) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return srcLength; } destSize=srcLength; /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept in this segment */ i=srcLength; /* collect code units for one base character */ U16_BACK_1(src, 0, srcLength); /* copy this base character */ j=srcLength; do { *dest++=src[j++]; } while(j<i); } while(srcLength>0); break; case UBIDI_KEEP_BASE_COMBINING: /* * Here, too, the destination * run will have the same length as the source run, * and there is no mirroring. * We do need to keep combining characters with their base characters. */ if(destSize<srcLength) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return srcLength; } destSize=srcLength; /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept in this segment */ i=srcLength; /* collect code units and modifier letters for one base character */ do { U16_PREV(src, 0, srcLength, c); } while(srcLength>0 && IS_COMBINING(u_charType(c))); /* copy this "user character" */ j=srcLength; do { *dest++=src[j++]; } while(j<i); } while(srcLength>0); break; default: /* * With several "complicated" options set, this is the most * general and the slowest copying of an RTL run. * We will do mirroring, remove BiDi controls, and * keep combining characters with their base characters * as requested. */ if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) { i=srcLength; } else { /* we need to find out the destination length of the run, which will not include the BiDi control characters */ int32_t length=srcLength; UChar ch; i=0; do { ch=*src++; if(!IS_BIDI_CONTROL_CHAR(ch)) { ++i; } } while(--length>0); src-=srcLength; } if(destSize<i) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return i; } destSize=i; /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept in this segment */ i=srcLength; /* collect code units for one base character */ U16_PREV(src, 0, srcLength, c); if(options&UBIDI_KEEP_BASE_COMBINING) { /* collect modifier letters for this base character */ while(srcLength>0 && IS_COMBINING(u_charType(c))) { U16_PREV(src, 0, srcLength, c); } } if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) { /* do not copy this BiDi control character */ continue; } /* copy this "user character" */ j=srcLength; if(options&UBIDI_DO_MIRRORING) { /* mirror only the base character */ int32_t k=0; c=u_charMirror(c); U16_APPEND_UNSAFE(dest, k, c); dest+=k; j+=k; } while(j<i) { *dest++=src[j++]; } } while(srcLength>0); break; } /* end of switch */ return destSize; }