void
CasePropsBuilder::makeUnfoldData(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }

    UChar *p, *q;
    int32_t i, j, k;

    /* sort the data */
    int32_t unfoldLength=unfold.length();
    int32_t unfoldRows=unfoldLength/UGENCASE_UNFOLD_WIDTH-1;
    UChar *unfoldBuffer=unfold.getBuffer(-1);
    uprv_sortArray(unfoldBuffer+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
                   compareUnfold, NULL, FALSE, &errorCode);

    /* make unique-string rows by merging adjacent ones' code point columns */

    /* make p point to row i-1 */
    p=unfoldBuffer+UGENCASE_UNFOLD_WIDTH;

    for(i=1; i<unfoldRows;) {
        if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
            /* concatenate code point columns */
            q=p+UGENCASE_UNFOLD_STRING_WIDTH;
            for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
            for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
                q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
            }
            if(j>UGENCASE_UNFOLD_CP_WIDTH) {
                fprintf(stderr, "genprops error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
                        (long)j, UGENCASE_UNFOLD_CP_WIDTH);
                errorCode=U_BUFFER_OVERFLOW_ERROR;
                return;
            }

            /* move following rows up one */
            --unfoldRows;
            u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
        } else {
            p+=UGENCASE_UNFOLD_WIDTH;
            ++i;
        }
    }

    unfoldBuffer[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;

    if(beVerbose) {
        puts("unfold data:");

        p=unfoldBuffer;
        for(i=0; i<unfoldRows; ++i) {
            p+=UGENCASE_UNFOLD_WIDTH;
            printf("[%2d] %04x %04x %04x <- %04x %04x\n",
                   (int)i, p[0], p[1], p[2], p[3], p[4]);
        }
    }

    unfold.releaseBuffer((unfoldRows+1)*UGENCASE_UNFOLD_WIDTH);
}
bool pelet::UCharBufferClass::OpenString(const UnicodeString& code) {
	Close();
	int length = code.length();
	if (length > 0) {
		LineNumber = 1;
		UChar* buf = new UChar[length + 1];
		u_memmove(buf, code.getBuffer(), length);
		buf[length] = '\0';
		Buffer = buf;
		//Buffer = code.getTerminatedBuffer();
		Current = Buffer;
		TokenStart = Buffer;
		Marker = Buffer;
		Limit = Buffer + length + 1;
	}
	return length > 0;
}
void  pelet::UCharBufferedFileClass::RemoveLeadingSlackSpace() {
	CharacterPos += TokenStart - Buffer;

	// copy any good content to the beginning of the buffer
	// since Limit points to the last character and not PAST the string, we do +1 
	int goodCount = Limit - TokenStart + 1;
	int currentIndex = Current - TokenStart;
	int markerIndex = Marker - TokenStart;

	u_memmove(Buffer, TokenStart, goodCount);
	
	// make everything point to the beginning of the buffer
	// also move limit back to the end of the unread content
	TokenStart = Buffer;
	Current = Buffer + currentIndex;
	Marker = Buffer + markerIndex;

}
Exemple #4
0
/* Instead of having a separate pass for 'special' patterns, reintegrate the two
 * so we don't get bitten by preflight bugs again.  We can be reasonably efficient
 * without two separate code paths, this code isn't that performance-critical.
 *
 * This code is general enough to deal with patterns that have a prefix or swap the
 * language and remainder components, since we gave developers enough rope to do such
 * things if they futz with the pattern data.  But since we don't give them a way to
 * specify a pattern for arbitrary combinations of components, there's not much use in
 * that.  I don't think our data includes such patterns, the only variable I know if is
 * whether there is a space before the open paren, or not.  Oh, and zh uses different
 * chars than the standard open/close paren (which ja and ko use, btw).
 */
U_CAPI int32_t U_EXPORT2
uloc_getDisplayName(const char *locale,
                    const char *displayLocale,
                    UChar *dest, int32_t destCapacity,
                    UErrorCode *pErrorCode)
{
    static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
    static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
    static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
    static const int32_t subLen = 3;
    static const UChar defaultPattern[10] = {
        0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
    }; /* {0} ({1}) */
    static const int32_t defaultPatLen = 9;
    static const int32_t defaultSub0Pos = 0;
    static const int32_t defaultSub1Pos = 5;

    int32_t length; /* of formatted result */

    const UChar *separator;
    int32_t sepLen = 0;
    const UChar *pattern;
    int32_t patLen = 0;
    int32_t sub0Pos, sub1Pos;
    
    UChar formatOpenParen         = 0x0028; // (
    UChar formatReplaceOpenParen  = 0x005B; // [
    UChar formatCloseParen        = 0x0029; // )
    UChar formatReplaceCloseParen = 0x005D; // ]

    UBool haveLang = TRUE; /* assume true, set false if we find we don't have
                              a lang component in the locale */
    UBool haveRest = TRUE; /* assume true, set false if we find we don't have
                              any other component in the locale */
    UBool retry = FALSE; /* set true if we need to retry, see below */

    int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }

    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    {
        UErrorCode status = U_ZERO_ERROR;
        UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status);
        UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern,
                                                             NULL, &status);

        separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status);
        pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status);

        ures_close(dspbundle);
        ures_close(locbundle);
    }

    /* If we couldn't find any data, then use the defaults */
    if(sepLen == 0) {
       separator = defaultSeparator;
    }
    /* #10244: Even though separator is now a pattern, it is awkward to handle it as such
     * here since we are trying to build the display string in place in the dest buffer,
     * and to handle it as a pattern would entail having separate storage for the
     * substrings that need to be combined (the first of which may be the result of
     * previous such combinations). So for now we continue to treat the portion between
     * {0} and {1} as a string to be appended when joining substrings, ignoring anything
     * that is before {0} or after {1} (no existing separator pattern has any such thing).
     * This is similar to how pattern is handled below.
     */
    {
        UChar *p0=u_strstr(separator, sub0);
        UChar *p1=u_strstr(separator, sub1);
        if (p0==NULL || p1==NULL || p1<p0) {
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
        separator = (const UChar *)p0 + subLen;
        sepLen = p1 - separator;
    }

    if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
        pattern=defaultPattern;
        patLen=defaultPatLen;
        sub0Pos=defaultSub0Pos;
        sub1Pos=defaultSub1Pos;
        // use default formatOpenParen etc. set above
    } else { /* non-default pattern */
        UChar *p0=u_strstr(pattern, sub0);
        UChar *p1=u_strstr(pattern, sub1);
        if (p0==NULL || p1==NULL) {
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
        sub0Pos=p0-pattern;
        sub1Pos=p1-pattern;
        if (sub1Pos < sub0Pos) { /* a very odd pattern */
            int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
            langi=1;
        }
        if (u_strchr(pattern, 0xFF08) != NULL) {
            formatOpenParen         = 0xFF08; // fullwidth (
            formatReplaceOpenParen  = 0xFF3B; // fullwidth [
            formatCloseParen        = 0xFF09; // fullwidth )
            formatReplaceCloseParen = 0xFF3D; // fullwidth ]
        }
    }

    /* We loop here because there is one case in which after the first pass we could need to
     * reextract the data.  If there's initial padding before the first element, we put in
     * the padding and then write that element.  If it turns out there's no second element,
     * we didn't need the padding.  If we do need the data (no preflight), and the first element
     * would have fit but for the padding, we need to reextract.  In this case (only) we
     * adjust the parameters so padding is not added, and repeat.
     */
    do {
        UChar* p=dest;
        int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
        int32_t langLen=0; /* length of language substitution */
        int32_t langPos=0; /* position in output of language substitution */
        int32_t restLen=0; /* length of 'everything else' substitution */
        int32_t restPos=0; /* position in output of 'everything else' substitution */
        UEnumeration* kenum = NULL; /* keyword enumeration */

        /* prefix of pattern, extremely likely to be empty */
        if(sub0Pos) {
            if(destCapacity >= sub0Pos) {
                while (patPos < sub0Pos) {
                    *p++ = pattern[patPos++];
                }
            } else {
                patPos=sub0Pos;
            }
            length=sub0Pos;
        } else {
            length=0;
        }

        for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
            UBool subdone = FALSE; /* set true when ready to move to next substitution */

            /* prep p and cap for calls to get display components, pin cap to 0 since
               they complain if cap is negative */
            int32_t cap=destCapacity-length;
            if (cap <= 0) {
                cap=0;
            } else {
                p=dest+length;
            }

            if (subi == langi) { /* {0}*/
                if(haveLang) {
                    langPos=length;
                    langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
                    length+=langLen;
                    haveLang=langLen>0;
                }
                subdone=TRUE;
            } else { /* {1} */
                if(!haveRest) {
                    subdone=TRUE;
                } else {
                    int32_t len; /* length of component (plus other stuff) we just fetched */
                    switch(resti++) {
                        case 0:
                            restPos=length;
                            len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 1:
                            len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 2:
                            len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 3:
                            kenum = uloc_openKeywords(locale, pErrorCode);
                            /* fall through */
                        default: {
                            const char* kw=uenum_next(kenum, &len, pErrorCode);
                            if (kw == NULL) {
                                uenum_close(kenum);
                                len=0; /* mark that we didn't add a component */
                                subdone=TRUE;
                            } else {
                                /* incorporating this behavior into the loop made it even more complex,
                                   so just special case it here */
                                len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
                                if(len) {
                                    if(len < cap) {
                                        p[len]=0x3d; /* '=', assume we'll need it */
                                    }
                                    len+=1;

                                    /* adjust for call to get keyword */
                                    cap-=len;
                                    if(cap <= 0) {
                                        cap=0;
                                    } else {
                                        p+=len;
                                    }
                                }
                                /* reset for call below */
                                if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
                                    *pErrorCode=U_ZERO_ERROR;
                                }
                                int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
                                                                           p, cap, pErrorCode);
                                if(len) {
                                    if(vlen==0) {
                                        --len; /* remove unneeded '=' */
                                    }
                                    /* restore cap and p to what they were at start */
                                    cap=destCapacity-length;
                                    if(cap <= 0) {
                                        cap=0;
                                    } else {
                                        p=dest+length;
                                    }
                                }
                                len+=vlen; /* total we added for key + '=' + value */
                            }
                        } break;
                    } /* end switch */

                    if (len>0) {
                        /* we addeed a component, so add separator and write it if there's room. */
                        if(len+sepLen<=cap) {
                            const UChar * plimit = p + len;
                            for (; p < plimit; p++) {
                                if (*p == formatOpenParen) {
                                    *p = formatReplaceOpenParen;
                                } else if (*p == formatCloseParen) {
                                    *p = formatReplaceCloseParen;
                                }
                            }
                            for(int32_t i=0;i<sepLen;++i) {
                                *p++=separator[i];
                            }
                        }
                        length+=len+sepLen;
                    } else if(subdone) {
                        /* remove separator if we added it */
                        if (length!=restPos) {
                            length-=sepLen;
                        }
                        restLen=length-restPos;
                        haveRest=restLen>0;
                    }
                }
            }

            if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
                *pErrorCode=U_ZERO_ERROR;
            }

            if(subdone) {
                if(haveLang && haveRest) {
                    /* append internal portion of pattern, the first time,
                       or last portion of pattern the second time */
                    int32_t padLen;
                    patPos+=subLen;
                    padLen=(subi==0 ? sub1Pos : patLen)-patPos;
                    if(length+padLen < destCapacity) {
                        p=dest+length;
                        for(int32_t i=0;i<padLen;++i) {
                            *p++=pattern[patPos++];
                        }
                    } else {
                        patPos+=padLen;
                    }
                    length+=padLen;
                } else if(subi==0) {
                    /* don't have first component, reset for second component */
                    sub0Pos=0;
                    length=0;
                } else if(length>0) {
                    /* true length is the length of just the component we got. */
                    length=haveLang?langLen:restLen;
                    if(dest && sub0Pos!=0) {
                        if (sub0Pos+length<=destCapacity) {
                            /* first component not at start of result,
                               but we have full component in buffer. */
                            u_memmove(dest, dest+(haveLang?langPos:restPos), length);
                        } else {
                            /* would have fit, but didn't because of pattern prefix. */
                            sub0Pos=0; /* stops initial padding (and a second retry,
                                          so we won't end up here again) */
                            retry=TRUE;
                        }
                    }
                }

                ++subi; /* move on to next substitution */
            }
        }
    } while(retry);

    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
Exemple #5
0
/* private function used for buffering input */
void
ufile_fill_uchar_buffer(UFILE *f)
{
    UErrorCode  status;
    const char  *mySource;
    const char  *mySourceEnd;
    UChar       *myTarget;
    int32_t     bufferSize;
    int32_t     maxCPBytes;
    int32_t     bytesRead;
    int32_t     availLength;
    int32_t     dataSize;
    char        charBuffer[UFILE_CHARBUFFER_SIZE];
    u_localized_string *str;

    if (f->fFile == NULL) {
        /* There is nothing to do. It's a string. */
        return;
    }

    str = &f->str;
    dataSize = (int32_t)(str->fLimit - str->fPos);
    if (f->fFileno == 0 && dataSize > 0) {
        /* Don't read from stdin too many times. There is still some data. */
        return;
    }

    /* shift the buffer if it isn't empty */
    if(dataSize != 0) {
        u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */
    }


    /* record how much buffer space is available */
    availLength = UFILE_UCHARBUFFER_SIZE - dataSize;

    /* Determine the # of codepage bytes needed to fill our UChar buffer */
    /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
    maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);

    /* Read in the data to convert */
    if (f->fFileno == 0) {
        /* Special case. Read from stdin one line at a time. */
        char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
        bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
    }
    else {
        /* A normal file */
        bytesRead = (int32_t)fread(charBuffer,
            sizeof(char),
            ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
            f->fFile);
    }

    /* Set up conversion parameters */
    status      = U_ZERO_ERROR;
    mySource    = charBuffer;
    mySourceEnd = charBuffer + bytesRead;
    myTarget    = f->fUCBuffer + dataSize;
    bufferSize  = UFILE_UCHARBUFFER_SIZE;

    if(f->fConverter != NULL) { /* We have a valid converter */
        /* Perform the conversion */
        ucnv_toUnicode(f->fConverter,
            &myTarget,
            f->fUCBuffer + bufferSize,
            &mySource,
            mySourceEnd,
            NULL,
            (UBool)(feof(f->fFile) != 0),
            &status);

    } else { /*weiv: do the invariant conversion */
        u_charsToUChars(mySource, myTarget, bytesRead);
        myTarget += bytesRead;
    }

    /* update the pointers into our array */
    str->fPos    = str->fBuffer;
    str->fLimit  = myTarget;
}
static void 
storeMappingData(){

    int32_t pos = UHASH_FIRST;
    const UHashElement* element = NULL;
    ValueStruct* value  = NULL;
    int32_t codepoint = 0;
    int32_t elementCount = 0;
    int32_t writtenElementCount = 0;
    int32_t mappingLength = 1; /* minimum mapping length */
    int32_t oldMappingLength = 0;
    uint16_t trieWord =0;
    int32_t limitIndex = 0;

    if (hashTable == NULL) {
        return;
    }
    elementCount = uhash_count(hashTable);

	/*initialize the mapping data */
    mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR);

    while(writtenElementCount < elementCount){

        while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
            
            codepoint = element->key.integer;
            value = (ValueStruct*)element->value.pointer;
            
            /* store the start of indexes */
            if(oldMappingLength != mappingLength){
                /* Assume that index[] is used according to the enums defined */
                if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
                    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
                }
                if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
                   mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
                   
                    limitIndex = currentIndex;
                     
                }
                oldMappingLength = mappingLength;
            }

            if(value->length == mappingLength){
                uint32_t savedTrieWord = 0;
                trieWord = currentIndex << 2;
                /* turn on the 2nd bit to signal that the following bits contain an index */
                trieWord += 0x02;
            
                if(trieWord > _SPREP_TYPE_THRESHOLD){
                    fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
                    exit(U_ILLEGAL_CHAR_FOUND);
                }
                /* figure out if the code point has type already stored */
                savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
                if(savedTrieWord!=0){
                    if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
                        /* turn on the first bit in trie word */
                        trieWord += 0x01;
                    }else{
                        /* 
                         * the codepoint has value something other than prohibited
                         * and a mapping .. error! 
                         */
                        fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
                        exit(U_ILLEGAL_ARGUMENT_ERROR); 
                    } 
                } 
                
                /* now set the value in the trie */
                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
                    fprintf(stderr,"Could not set the value for code point.\n");
                    exit(U_ILLEGAL_ARGUMENT_ERROR);   
                }

                /* written the trie word for the codepoint... increment the count*/
                writtenElementCount++;

                /* sanity check are we exceeding the max number allowed */
                if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
                    fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", 
                        currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
                    exit(U_INDEX_OUTOFBOUNDS_ERROR);
                }

                /* copy the mapping data */
                /* write the length */
                if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
                     /* the cast here is safe since we donot expect the length to be > 65535 */
                     mappingData[currentIndex++] = (uint16_t) mappingLength;
                }
                /* copy the contents to mappindData array */
                u_memmove(mappingData+currentIndex, value->mapping, value->length);
                currentIndex += value->length;
                if (currentIndex > mappingDataCapacity) {
                    /* If this happens there is a bug in the computation of the mapping data size in storeMapping() */
                    fprintf(stderr, "gensprep, fatal error at %s, %d.  Aborting.\n", __FILE__, __LINE__);
                    exit(U_INTERNAL_PROGRAM_ERROR);
                }
            }
        }
        mappingLength++;
        pos = -1;
    }
    /* set the last length for range check */
    if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
        indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
    }else{
        indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
    }
    
}
U_CFUNC int32_t
ustrcase_map(const UCaseMap *csm,
             UChar *dest, int32_t destCapacity,
             const UChar *src, int32_t srcLength,
             UStringCaseMapper *stringCaseMapper,
             UErrorCode *pErrorCode) {
    UChar buffer[300];
    UChar *temp;

    int32_t destLength;

    /* check argument values */
    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if( destCapacity<0 ||
        (dest==NULL && destCapacity>0) ||
        src==NULL ||
        srcLength<-1
    ) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* get the string length */
    if(srcLength==-1) {
        srcLength=u_strlen(src);
    }

    /* check for overlapping source and destination */
    if( dest!=NULL &&
        ((src>=dest && src<(dest+destCapacity)) ||
         (dest>=src && dest<(src+srcLength)))
    ) {
        /* overlap: provide a temporary destination buffer and later copy the result */
        if(destCapacity<=UPRV_LENGTHOF(buffer)) {
            /* the stack buffer is large enough */
            temp=buffer;
        } else {
            /* allocate a buffer */
            temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
            if(temp==NULL) {
                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
                return 0;
            }
        }
    } else {
        temp=dest;
    }

    destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode);
    if(temp!=dest) {
        /* copy the result string to the destination buffer */
        if(destLength>0) {
            int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
            if(copyLength>0) {
                u_memmove(dest, temp, copyLength);
            }
        }
        if(temp!=buffer) {
            uprv_free(temp);
        }
    }

    return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
Exemple #8
0
int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, 
                                    UChar* dest, int32_t destCapacity, 
                                    UBool allowUnassigned,
                                    UParseError* parseError,
                                    UErrorCode& status ){
    // check error status
    if(U_FAILURE(status)){
        return 0;
    }
    
    //check arguments
    if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
        status=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    UnicodeString b1String;
    UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
    int32_t b1Len;

    int32_t b1Index = 0;
    UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
    UBool leftToRight=FALSE, rightToLeft=FALSE;

    b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
    b1String.releaseBuffer(b1Len);

    if(status == U_BUFFER_OVERFLOW_ERROR){
        // redo processing of string
        /* we do not have enough room so grow the buffer*/
        b1 = b1String.getBuffer(b1Len);
        status = U_ZERO_ERROR; // reset error
        b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
        b1String.releaseBuffer(b1Len);
    }

    if(U_FAILURE(status)){
        b1Len = 0;
        goto CLEANUP;
    }


    for(; b1Index<b1Len; ){

        UChar32 ch = 0;

        U16_NEXT(b1, b1Index, b1Len, ch);

        if(prohibited.contains(ch) && ch!=0x0020){
            status = U_IDNA_PROHIBITED_ERROR;
            b1Len = 0;
            goto CLEANUP;
        }

        direction = u_charDirection(ch);
        if(firstCharDir==U_CHAR_DIRECTION_COUNT){
            firstCharDir = direction;
        }
        if(direction == U_LEFT_TO_RIGHT){
            leftToRight = TRUE;
        }
        if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
            rightToLeft = TRUE;
        }
    }       
    
    // satisfy 2
    if( leftToRight == TRUE && rightToLeft == TRUE){
        status = U_IDNA_CHECK_BIDI_ERROR;
        b1Len = 0;
        goto CLEANUP;
    }

    //satisfy 3
    if( rightToLeft == TRUE && 
        !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
          (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
       ){
        status = U_IDNA_CHECK_BIDI_ERROR;
        return FALSE;
    }

    if(b1Len <= destCapacity){
        u_memmove(dest, b1, b1Len);
    }

CLEANUP:
    return u_terminateUChars(dest, destCapacity, b1Len, &status);
}
static void
makeUnfoldData() {
    static const UChar
        iDot[2]=        { 0x69, 0x307 };

    UChar *p, *q;
    int32_t i, j, k;
    UErrorCode errorCode;

    /*
     * add a case folding that we missed because it's conditional:
     * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
     */
    addUnfolding(0x130, iDot, 2);

    /* sort the data */
    errorCode=U_ZERO_ERROR;
    uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
                   compareUnfold, NULL, FALSE, &errorCode);

    /* make unique-string rows by merging adjacent ones' code point columns */

    /* make p point to row i-1 */
    p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH;

    for(i=1; i<unfoldRows;) {
        if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
            /* concatenate code point columns */
            q=p+UGENCASE_UNFOLD_STRING_WIDTH;
            for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
            for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
                q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
            }
            if(j>UGENCASE_UNFOLD_CP_WIDTH) {
                fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
                        (long)j, UGENCASE_UNFOLD_CP_WIDTH);
                exit(U_BUFFER_OVERFLOW_ERROR);
            }

            /* move following rows up one */
            --unfoldRows;
            unfoldTop-=UGENCASE_UNFOLD_WIDTH;
            u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
        } else {
            p+=UGENCASE_UNFOLD_WIDTH;
            ++i;
        }
    }

    unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;

    if(beVerbose) {
        puts("unfold data:");

        p=(UChar *)unfold;
        for(i=0; i<unfoldRows; ++i) {
            p+=UGENCASE_UNFOLD_WIDTH;
            printf("[%2d] %04x %04x %04x <- %04x %04x\n",
                   (int)i, p[0], p[1], p[2], p[3], p[4]);
        }
    }
}