void CasePropsBuilder::makeUnfoldData(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } UChar *p, *q; int32_t i, j, k; /* sort the data */ int32_t unfoldLength=unfold.length(); int32_t unfoldRows=unfoldLength/UGENCASE_UNFOLD_WIDTH-1; UChar *unfoldBuffer=unfold.getBuffer(-1); uprv_sortArray(unfoldBuffer+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2, compareUnfold, NULL, FALSE, &errorCode); /* make unique-string rows by merging adjacent ones' code point columns */ /* make p point to row i-1 */ p=unfoldBuffer+UGENCASE_UNFOLD_WIDTH; for(i=1; i<unfoldRows;) { if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) { /* concatenate code point columns */ q=p+UGENCASE_UNFOLD_STRING_WIDTH; for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {} for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) { q[j]=q[UGENCASE_UNFOLD_WIDTH+k]; } if(j>UGENCASE_UNFOLD_CP_WIDTH) { fprintf(stderr, "genprops error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n", (long)j, UGENCASE_UNFOLD_CP_WIDTH); errorCode=U_BUFFER_OVERFLOW_ERROR; return; } /* move following rows up one */ --unfoldRows; u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH); } else { p+=UGENCASE_UNFOLD_WIDTH; ++i; } } unfoldBuffer[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows; if(beVerbose) { puts("unfold data:"); p=unfoldBuffer; for(i=0; i<unfoldRows; ++i) { p+=UGENCASE_UNFOLD_WIDTH; printf("[%2d] %04x %04x %04x <- %04x %04x\n", (int)i, p[0], p[1], p[2], p[3], p[4]); } } unfold.releaseBuffer((unfoldRows+1)*UGENCASE_UNFOLD_WIDTH); }
bool pelet::UCharBufferClass::OpenString(const UnicodeString& code) { Close(); int length = code.length(); if (length > 0) { LineNumber = 1; UChar* buf = new UChar[length + 1]; u_memmove(buf, code.getBuffer(), length); buf[length] = '\0'; Buffer = buf; //Buffer = code.getTerminatedBuffer(); Current = Buffer; TokenStart = Buffer; Marker = Buffer; Limit = Buffer + length + 1; } return length > 0; }
void pelet::UCharBufferedFileClass::RemoveLeadingSlackSpace() { CharacterPos += TokenStart - Buffer; // copy any good content to the beginning of the buffer // since Limit points to the last character and not PAST the string, we do +1 int goodCount = Limit - TokenStart + 1; int currentIndex = Current - TokenStart; int markerIndex = Marker - TokenStart; u_memmove(Buffer, TokenStart, goodCount); // make everything point to the beginning of the buffer // also move limit back to the end of the unread content TokenStart = Buffer; Current = Buffer + currentIndex; Marker = Buffer + markerIndex; }
/* Instead of having a separate pass for 'special' patterns, reintegrate the two * so we don't get bitten by preflight bugs again. We can be reasonably efficient * without two separate code paths, this code isn't that performance-critical. * * This code is general enough to deal with patterns that have a prefix or swap the * language and remainder components, since we gave developers enough rope to do such * things if they futz with the pattern data. But since we don't give them a way to * specify a pattern for arbitrary combinations of components, there's not much use in * that. I don't think our data includes such patterns, the only variable I know if is * whether there is a space before the open paren, or not. Oh, and zh uses different * chars than the standard open/close paren (which ja and ko use, btw). */ U_CAPI int32_t U_EXPORT2 uloc_getDisplayName(const char *locale, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */ static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */ static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */ static const int32_t subLen = 3; static const UChar defaultPattern[10] = { 0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000 }; /* {0} ({1}) */ static const int32_t defaultPatLen = 9; static const int32_t defaultSub0Pos = 0; static const int32_t defaultSub1Pos = 5; int32_t length; /* of formatted result */ const UChar *separator; int32_t sepLen = 0; const UChar *pattern; int32_t patLen = 0; int32_t sub0Pos, sub1Pos; UChar formatOpenParen = 0x0028; // ( UChar formatReplaceOpenParen = 0x005B; // [ UChar formatCloseParen = 0x0029; // ) UChar formatReplaceCloseParen = 0x005D; // ] UBool haveLang = TRUE; /* assume true, set false if we find we don't have a lang component in the locale */ UBool haveRest = TRUE; /* assume true, set false if we find we don't have any other component in the locale */ UBool retry = FALSE; /* set true if we need to retry, see below */ int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } { UErrorCode status = U_ZERO_ERROR; UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status); UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern, NULL, &status); separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status); pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status); ures_close(dspbundle); ures_close(locbundle); } /* If we couldn't find any data, then use the defaults */ if(sepLen == 0) { separator = defaultSeparator; } /* #10244: Even though separator is now a pattern, it is awkward to handle it as such * here since we are trying to build the display string in place in the dest buffer, * and to handle it as a pattern would entail having separate storage for the * substrings that need to be combined (the first of which may be the result of * previous such combinations). So for now we continue to treat the portion between * {0} and {1} as a string to be appended when joining substrings, ignoring anything * that is before {0} or after {1} (no existing separator pattern has any such thing). * This is similar to how pattern is handled below. */ { UChar *p0=u_strstr(separator, sub0); UChar *p1=u_strstr(separator, sub1); if (p0==NULL || p1==NULL || p1<p0) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } separator = (const UChar *)p0 + subLen; sepLen = p1 - separator; } if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) { pattern=defaultPattern; patLen=defaultPatLen; sub0Pos=defaultSub0Pos; sub1Pos=defaultSub1Pos; // use default formatOpenParen etc. set above } else { /* non-default pattern */ UChar *p0=u_strstr(pattern, sub0); UChar *p1=u_strstr(pattern, sub1); if (p0==NULL || p1==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } sub0Pos=p0-pattern; sub1Pos=p1-pattern; if (sub1Pos < sub0Pos) { /* a very odd pattern */ int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t; langi=1; } if (u_strchr(pattern, 0xFF08) != NULL) { formatOpenParen = 0xFF08; // fullwidth ( formatReplaceOpenParen = 0xFF3B; // fullwidth [ formatCloseParen = 0xFF09; // fullwidth ) formatReplaceCloseParen = 0xFF3D; // fullwidth ] } } /* We loop here because there is one case in which after the first pass we could need to * reextract the data. If there's initial padding before the first element, we put in * the padding and then write that element. If it turns out there's no second element, * we didn't need the padding. If we do need the data (no preflight), and the first element * would have fit but for the padding, we need to reextract. In this case (only) we * adjust the parameters so padding is not added, and repeat. */ do { UChar* p=dest; int32_t patPos=0; /* position in the pattern, used for non-substitution portions */ int32_t langLen=0; /* length of language substitution */ int32_t langPos=0; /* position in output of language substitution */ int32_t restLen=0; /* length of 'everything else' substitution */ int32_t restPos=0; /* position in output of 'everything else' substitution */ UEnumeration* kenum = NULL; /* keyword enumeration */ /* prefix of pattern, extremely likely to be empty */ if(sub0Pos) { if(destCapacity >= sub0Pos) { while (patPos < sub0Pos) { *p++ = pattern[patPos++]; } } else { patPos=sub0Pos; } length=sub0Pos; } else { length=0; } for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/ UBool subdone = FALSE; /* set true when ready to move to next substitution */ /* prep p and cap for calls to get display components, pin cap to 0 since they complain if cap is negative */ int32_t cap=destCapacity-length; if (cap <= 0) { cap=0; } else { p=dest+length; } if (subi == langi) { /* {0}*/ if(haveLang) { langPos=length; langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode); length+=langLen; haveLang=langLen>0; } subdone=TRUE; } else { /* {1} */ if(!haveRest) { subdone=TRUE; } else { int32_t len; /* length of component (plus other stuff) we just fetched */ switch(resti++) { case 0: restPos=length; len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode); break; case 1: len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode); break; case 2: len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode); break; case 3: kenum = uloc_openKeywords(locale, pErrorCode); /* fall through */ default: { const char* kw=uenum_next(kenum, &len, pErrorCode); if (kw == NULL) { uenum_close(kenum); len=0; /* mark that we didn't add a component */ subdone=TRUE; } else { /* incorporating this behavior into the loop made it even more complex, so just special case it here */ len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode); if(len) { if(len < cap) { p[len]=0x3d; /* '=', assume we'll need it */ } len+=1; /* adjust for call to get keyword */ cap-=len; if(cap <= 0) { cap=0; } else { p+=len; } } /* reset for call below */ if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) { *pErrorCode=U_ZERO_ERROR; } int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale, p, cap, pErrorCode); if(len) { if(vlen==0) { --len; /* remove unneeded '=' */ } /* restore cap and p to what they were at start */ cap=destCapacity-length; if(cap <= 0) { cap=0; } else { p=dest+length; } } len+=vlen; /* total we added for key + '=' + value */ } } break; } /* end switch */ if (len>0) { /* we addeed a component, so add separator and write it if there's room. */ if(len+sepLen<=cap) { const UChar * plimit = p + len; for (; p < plimit; p++) { if (*p == formatOpenParen) { *p = formatReplaceOpenParen; } else if (*p == formatCloseParen) { *p = formatReplaceCloseParen; } } for(int32_t i=0;i<sepLen;++i) { *p++=separator[i]; } } length+=len+sepLen; } else if(subdone) { /* remove separator if we added it */ if (length!=restPos) { length-=sepLen; } restLen=length-restPos; haveRest=restLen>0; } } } if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) { *pErrorCode=U_ZERO_ERROR; } if(subdone) { if(haveLang && haveRest) { /* append internal portion of pattern, the first time, or last portion of pattern the second time */ int32_t padLen; patPos+=subLen; padLen=(subi==0 ? sub1Pos : patLen)-patPos; if(length+padLen < destCapacity) { p=dest+length; for(int32_t i=0;i<padLen;++i) { *p++=pattern[patPos++]; } } else { patPos+=padLen; } length+=padLen; } else if(subi==0) { /* don't have first component, reset for second component */ sub0Pos=0; length=0; } else if(length>0) { /* true length is the length of just the component we got. */ length=haveLang?langLen:restLen; if(dest && sub0Pos!=0) { if (sub0Pos+length<=destCapacity) { /* first component not at start of result, but we have full component in buffer. */ u_memmove(dest, dest+(haveLang?langPos:restPos), length); } else { /* would have fit, but didn't because of pattern prefix. */ sub0Pos=0; /* stops initial padding (and a second retry, so we won't end up here again) */ retry=TRUE; } } } ++subi; /* move on to next substitution */ } } } while(retry); return u_terminateUChars(dest, destCapacity, length, pErrorCode); }
/* private function used for buffering input */ void ufile_fill_uchar_buffer(UFILE *f) { UErrorCode status; const char *mySource; const char *mySourceEnd; UChar *myTarget; int32_t bufferSize; int32_t maxCPBytes; int32_t bytesRead; int32_t availLength; int32_t dataSize; char charBuffer[UFILE_CHARBUFFER_SIZE]; u_localized_string *str; if (f->fFile == NULL) { /* There is nothing to do. It's a string. */ return; } str = &f->str; dataSize = (int32_t)(str->fLimit - str->fPos); if (f->fFileno == 0 && dataSize > 0) { /* Don't read from stdin too many times. There is still some data. */ return; } /* shift the buffer if it isn't empty */ if(dataSize != 0) { u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ } /* record how much buffer space is available */ availLength = UFILE_UCHARBUFFER_SIZE - dataSize; /* Determine the # of codepage bytes needed to fill our UChar buffer */ /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); /* Read in the data to convert */ if (f->fFileno == 0) { /* Special case. Read from stdin one line at a time. */ char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); } else { /* A normal file */ bytesRead = (int32_t)fread(charBuffer, sizeof(char), ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); } /* Set up conversion parameters */ status = U_ZERO_ERROR; mySource = charBuffer; mySourceEnd = charBuffer + bytesRead; myTarget = f->fUCBuffer + dataSize; bufferSize = UFILE_UCHARBUFFER_SIZE; if(f->fConverter != NULL) { /* We have a valid converter */ /* Perform the conversion */ ucnv_toUnicode(f->fConverter, &myTarget, f->fUCBuffer + bufferSize, &mySource, mySourceEnd, NULL, (UBool)(feof(f->fFile) != 0), &status); } else { /*weiv: do the invariant conversion */ u_charsToUChars(mySource, myTarget, bytesRead); myTarget += bytesRead; } /* update the pointers into our array */ str->fPos = str->fBuffer; str->fLimit = myTarget; }
static void storeMappingData(){ int32_t pos = UHASH_FIRST; const UHashElement* element = NULL; ValueStruct* value = NULL; int32_t codepoint = 0; int32_t elementCount = 0; int32_t writtenElementCount = 0; int32_t mappingLength = 1; /* minimum mapping length */ int32_t oldMappingLength = 0; uint16_t trieWord =0; int32_t limitIndex = 0; if (hashTable == NULL) { return; } elementCount = uhash_count(hashTable); /*initialize the mapping data */ mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR); while(writtenElementCount < elementCount){ while( (element = uhash_nextElement(hashTable, &pos))!=NULL){ codepoint = element->key.integer; value = (ValueStruct*)element->value.pointer; /* store the start of indexes */ if(oldMappingLength != mappingLength){ /* Assume that index[] is used according to the enums defined */ if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){ indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex; } if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH && mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){ limitIndex = currentIndex; } oldMappingLength = mappingLength; } if(value->length == mappingLength){ uint32_t savedTrieWord = 0; trieWord = currentIndex << 2; /* turn on the 2nd bit to signal that the following bits contain an index */ trieWord += 0x02; if(trieWord > _SPREP_TYPE_THRESHOLD){ fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD); exit(U_ILLEGAL_CHAR_FOUND); } /* figure out if the code point has type already stored */ savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); if(savedTrieWord!=0){ if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ /* turn on the first bit in trie word */ trieWord += 0x01; }else{ /* * the codepoint has value something other than prohibited * and a mapping .. error! */ fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); exit(U_ILLEGAL_ARGUMENT_ERROR); } } /* now set the value in the trie */ if(!utrie_set32(sprepTrie,codepoint,trieWord)){ fprintf(stderr,"Could not set the value for code point.\n"); exit(U_ILLEGAL_ARGUMENT_ERROR); } /* written the trie word for the codepoint... increment the count*/ writtenElementCount++; /* sanity check are we exceeding the max number allowed */ if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){ fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE); exit(U_INDEX_OUTOFBOUNDS_ERROR); } /* copy the mapping data */ /* write the length */ if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){ /* the cast here is safe since we donot expect the length to be > 65535 */ mappingData[currentIndex++] = (uint16_t) mappingLength; } /* copy the contents to mappindData array */ u_memmove(mappingData+currentIndex, value->mapping, value->length); currentIndex += value->length; if (currentIndex > mappingDataCapacity) { /* If this happens there is a bug in the computation of the mapping data size in storeMapping() */ fprintf(stderr, "gensprep, fatal error at %s, %d. Aborting.\n", __FILE__, __LINE__); exit(U_INTERNAL_PROGRAM_ERROR); } } } mappingLength++; pos = -1; } /* set the last length for range check */ if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){ indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1; }else{ indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex; } }
U_CFUNC int32_t ustrcase_map(const UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, UErrorCode *pErrorCode) { UChar buffer[300]; UChar *temp; int32_t destLength; /* check argument values */ if(U_FAILURE(*pErrorCode)) { return 0; } if( destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL || srcLength<-1 ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* get the string length */ if(srcLength==-1) { srcLength=u_strlen(src); } /* check for overlapping source and destination */ if( dest!=NULL && ((src>=dest && src<(dest+destCapacity)) || (dest>=src && dest<(src+srcLength))) ) { /* overlap: provide a temporary destination buffer and later copy the result */ if(destCapacity<=UPRV_LENGTHOF(buffer)) { /* the stack buffer is large enough */ temp=buffer; } else { /* allocate a buffer */ temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); if(temp==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } } } else { temp=dest; } destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode); if(temp!=dest) { /* copy the result string to the destination buffer */ if(destLength>0) { int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; if(copyLength>0) { u_memmove(dest, temp, copyLength); } } if(temp!=buffer) { uprv_free(temp); } } return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); }
int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, UBool allowUnassigned, UParseError* parseError, UErrorCode& status ){ // check error status if(U_FAILURE(status)){ return 0; } //check arguments if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { status=U_ILLEGAL_ARGUMENT_ERROR; return 0; } UnicodeString b1String; UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE); int32_t b1Len; int32_t b1Index = 0; UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; UBool leftToRight=FALSE, rightToLeft=FALSE; b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status); b1String.releaseBuffer(b1Len); if(status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b1 = b1String.getBuffer(b1Len); status = U_ZERO_ERROR; // reset error b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status); b1String.releaseBuffer(b1Len); } if(U_FAILURE(status)){ b1Len = 0; goto CLEANUP; } for(; b1Index<b1Len; ){ UChar32 ch = 0; U16_NEXT(b1, b1Index, b1Len, ch); if(prohibited.contains(ch) && ch!=0x0020){ status = U_IDNA_PROHIBITED_ERROR; b1Len = 0; goto CLEANUP; } direction = u_charDirection(ch); if(firstCharDir==U_CHAR_DIRECTION_COUNT){ firstCharDir = direction; } if(direction == U_LEFT_TO_RIGHT){ leftToRight = TRUE; } if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ rightToLeft = TRUE; } } // satisfy 2 if( leftToRight == TRUE && rightToLeft == TRUE){ status = U_IDNA_CHECK_BIDI_ERROR; b1Len = 0; goto CLEANUP; } //satisfy 3 if( rightToLeft == TRUE && !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) ){ status = U_IDNA_CHECK_BIDI_ERROR; return FALSE; } if(b1Len <= destCapacity){ u_memmove(dest, b1, b1Len); } CLEANUP: return u_terminateUChars(dest, destCapacity, b1Len, &status); }
static void makeUnfoldData() { static const UChar iDot[2]= { 0x69, 0x307 }; UChar *p, *q; int32_t i, j, k; UErrorCode errorCode; /* * add a case folding that we missed because it's conditional: * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ addUnfolding(0x130, iDot, 2); /* sort the data */ errorCode=U_ZERO_ERROR; uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2, compareUnfold, NULL, FALSE, &errorCode); /* make unique-string rows by merging adjacent ones' code point columns */ /* make p point to row i-1 */ p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH; for(i=1; i<unfoldRows;) { if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) { /* concatenate code point columns */ q=p+UGENCASE_UNFOLD_STRING_WIDTH; for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {} for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) { q[j]=q[UGENCASE_UNFOLD_WIDTH+k]; } if(j>UGENCASE_UNFOLD_CP_WIDTH) { fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n", (long)j, UGENCASE_UNFOLD_CP_WIDTH); exit(U_BUFFER_OVERFLOW_ERROR); } /* move following rows up one */ --unfoldRows; unfoldTop-=UGENCASE_UNFOLD_WIDTH; u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH); } else { p+=UGENCASE_UNFOLD_WIDTH; ++i; } } unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows; if(beVerbose) { puts("unfold data:"); p=(UChar *)unfold; for(i=0; i<unfoldRows; ++i) { p+=UGENCASE_UNFOLD_WIDTH; printf("[%2d] %04x %04x %04x <- %04x %04x\n", (int)i, p[0], p[1], p[2], p[3], p[4]); } } }