Beispiel #1
0
U_CFUNC void
uprv_syntaxError(const UChar* rules,
                 int32_t pos,
                 int32_t rulesLen,
                 UParseError* parseError){
    if(parseError == NULL){
        return;
    }
    parseError->offset = pos;
    parseError->line = 0 ; // we are not using line numbers

    // for pre-context
    int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
    int32_t limit = pos;

    u_memcpy(parseError->preContext,rules+start,limit-start);
    //null terminate the buffer
    parseError->preContext[limit-start] = 0;

    // for post-context; include error rules[pos]
    start = pos;
    limit = start + (U_PARSE_CONTEXT_LEN-1);
    if (limit > rulesLen) {
        limit = rulesLen;
    }
    if (start < rulesLen) {
        u_memcpy(parseError->postContext,rules+start,limit-start);
    }
    //null terminate the buffer
    parseError->postContext[limit-start]= 0;
}
U_CAPI int32_t U_EXPORT2
ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
    UErrorCode status = U_ZERO_ERROR;
    int32_t len = 0;
    int32_t UCAlen = 0;
    const UChar* ucaRules = 0;
    const UChar *rules = ucol_getRules(coll, &len);
    if(delta == UCOL_FULL_RULES) {
        /* take the UCA rules and append real rules at the end */
        /* UCA rules will be probably coming from the root RB */
        ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
        /*
        UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
        UResourceBundle*  uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
        ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
        ures_close(uca);
        ures_close(cresb);
        */
    }
    if(U_FAILURE(status)) {
        return 0;
    }
    if(buffer!=0 && bufferLen>0){
        *buffer=0;
        if(UCAlen > 0) {
            u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
        }
        if(len > 0 && bufferLen > UCAlen) {
            u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
        }
    }
    return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
}
Beispiel #3
0
/*
 * Verify that the ICU BOCU-1 implementation produces the same results as
 * the reference implementation from the design folder.
 * Generate some texts and convert them with both converters, verifying
 * identical results and roundtripping.
 */
static void
TestBOCU1(void) {
    UChar *text;
    int32_t i, length;

    UConverter *bocu1;
    UErrorCode errorCode;

    errorCode=U_ZERO_ERROR;
    bocu1=ucnv_open("BOCU-1", &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("error: unable to open BOCU-1 converter: %s\n", u_errorName(errorCode));
        return;
    }

    text = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));

    /* text 1: each of strings[] once */
    length=0;
    for(i=0; i<LENGTHOF(strings); ++i) {
        u_memcpy(text+length, strings[i].s, strings[i].length);
        length+=strings[i].length;
    }
    roundtripBOCU1(bocu1, 1, text, length);

    /* text 2: each of strings[] twice */
    length=0;
    for(i=0; i<LENGTHOF(strings); ++i) {
        u_memcpy(text+length, strings[i].s, strings[i].length);
        length+=strings[i].length;
        u_memcpy(text+length, strings[i].s, strings[i].length);
        length+=strings[i].length;
    }
    roundtripBOCU1(bocu1, 2, text, length);

    /* text 3: each of strings[] many times (set step vs. |strings| so that all strings are used) */
    length=0;
    for(i=1; length<5000; i+=7) {
        if(i>=LENGTHOF(strings)) {
            i-=LENGTHOF(strings);
        }
        u_memcpy(text+length, strings[i].s, strings[i].length);
        length+=strings[i].length;
    }
    roundtripBOCU1(bocu1, 3, text, length);

    ucnv_close(bocu1);
    free(text);
}
UBool
UCharsTrieBuilder::ensureCapacity(int32_t length) {
    if(uchars==NULL) {
        return FALSE;  // previous memory allocation had failed
    }
    if(length>ucharsCapacity) {
        int32_t newCapacity=ucharsCapacity;
        do {
            newCapacity*=2;
        } while(newCapacity<=length);
        UChar *newUChars=static_cast<UChar *>(uprv_malloc(newCapacity*2));
        if(newUChars==NULL) {
            // unable to allocate memory
            uprv_free(uchars);
            uchars=NULL;
            ucharsCapacity=0;
            return FALSE;
        }
        u_memcpy(newUChars+(newCapacity-ucharsLength),
                 uchars+(ucharsCapacity-ucharsLength), ucharsLength);
        uprv_free(uchars);
        uchars=newUChars;
        ucharsCapacity=newCapacity;
    }
    return TRUE;
}
int32_t
UCharsTrieBuilder::write(const UChar *s, int32_t length) {
    int32_t newLength=ucharsLength+length;
    if(ensureCapacity(newLength)) {
        ucharsLength=newLength;
        u_memcpy(uchars+(ucharsCapacity-ucharsLength), s, length);
    }
    return ucharsLength;
}
Beispiel #6
0
static int32_t
_getStringOrCopyKey(const char *path, const char *locale,
                    const char *tableKey, 
                    const char* subTableKey,
                    const char *itemKey,
                    const char *substitute,
                    UChar *dest, int32_t destCapacity,
                    UErrorCode *pErrorCode) {
    const UChar *s = NULL;
    int32_t length = 0;

    if(itemKey==NULL) {
        /* top-level item: normal resource bundle access */
        UResourceBundle *rb;

        rb=ures_open(path, locale, pErrorCode);

        if(U_SUCCESS(*pErrorCode)) {
            s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
            /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
            ures_close(rb);
        }
    } else {
        /* Language code should not be a number. If it is, set the error code. */
        if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
            *pErrorCode = U_MISSING_RESOURCE_ERROR;
        } else {
            /* second-level item, use special fallback */
            s=uloc_getTableStringWithFallback(path, locale,
                                               tableKey, 
                                               subTableKey,
                                               itemKey,
                                               &length,
                                               pErrorCode);
        }
    }

    if(U_SUCCESS(*pErrorCode)) {
        int32_t copyLength=uprv_min(length, destCapacity);
        if(copyLength>0 && s != NULL) {
            u_memcpy(dest, s, copyLength);
        }
    } else {
        /* no string from a resource bundle: convert the substitute */
        length=(int32_t)uprv_strlen(substitute);
        u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
        *pErrorCode=U_USING_DEFAULT_WARNING;
    }

    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
static inline int32_t
appendString(UChar *dest, int32_t destIndex, int32_t destCapacity,
             const UChar *s, int32_t length) {
    if(length>0) {
        if(length>(INT32_MAX-destIndex)) {
            return -1;  // integer overflow
        }
        if((destIndex+length)<=destCapacity) {
            u_memcpy(dest+destIndex, s, length);
        }
        destIndex+=length;
    }
    return destIndex;
}
Beispiel #8
0
// Sets the source to the new character iterator.
void CollationElementIterator::setText(CharacterIterator& source, 
                                       UErrorCode& status)
{
    if (U_FAILURE(status)) 
        return;

    int32_t length = source.getLength();
    UChar *buffer = NULL;

    if (length == 0) {
        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
        /* test for NULL */
        if (buffer == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        *buffer = 0;
    }
    else {
        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
        /* test for NULL */
        if (buffer == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        /* 
        Using this constructor will prevent buffer from being removed when
        string gets removed
        */
        UnicodeString string;
        source.getText(string);
        u_memcpy(buffer, string.getBuffer(), length);
    }

    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
        uprv_free((UChar *)m_data_->iteratordata_.string);
    }
    m_data_->isWritable = TRUE;
    /* Free offsetBuffer before initializing it. */
    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 
        &m_data_->iteratordata_, &status);
    m_data_->reset_   = TRUE;
}
Beispiel #9
0
U_CAPI int32_t U_EXPORT2
ucnv_getDisplayName(const UConverter *cnv,
                    const char *displayLocale,
                    UChar *displayName, int32_t displayNameCapacity,
                    UErrorCode *pErrorCode) {
    UResourceBundle *rb;
    const UChar *name;
    int32_t length;
    UErrorCode localStatus = U_ZERO_ERROR;

    /* check arguments */
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }

    if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* open the resource bundle and get the display name string */
    rb=ures_open(NULL, displayLocale, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }

    /* use the internal name as the key */
    name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, &localStatus);
    ures_close(rb);

    if(U_SUCCESS(localStatus)) {
        /* copy the string */
        if (*pErrorCode == U_ZERO_ERROR) {
            *pErrorCode = localStatus;
        }
        u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR);
    } else {
        /* convert the internal name into a Unicode string */
        length=(int32_t)uprv_strlen(cnv->sharedData->staticData->name);
        u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity));
    }
    return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode);
}
Beispiel #10
0
/** 
* This is the "real" constructor for this class; it constructs an iterator
* over the source text using the specified collator
*/
CollationElementIterator::CollationElementIterator(
                                               const UnicodeString& sourceText,
                                               const RuleBasedCollator* order,
                                               UErrorCode& status)
                                               : isDataOwned_(TRUE)
{
    if (U_FAILURE(status)) {
        return;
    }

    int32_t length = sourceText.length();
    UChar *string = NULL;

    if (length > 0) {
        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
        /* test for NULL */
        if (string == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        /* 
        Using this constructor will prevent buffer from being removed when
        string gets removed
        */
        u_memcpy(string, sourceText.getBuffer(), length);
    }
    else {
        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
        /* test for NULL */
        if (string == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        *string = 0;
    }
    m_data_ = ucol_openElements(order->ucollator, string, length, &status);

    /* Test for buffer overflows */
    if (U_FAILURE(status)) {
        return;
    }
    m_data_->isWritable = TRUE;
}
void pelet::UCharBufferedFileClass::GrowBuffer(int minCapacity) {
	int newCapacity = minCapacity < (2 * BufferCapacity) ? (2 * BufferCapacity) : minCapacity;
	UChar* newBuffer = new UChar[newCapacity];
	u_memcpy(newBuffer, Buffer, BufferCapacity);
	u_memset(newBuffer + BufferCapacity, 'i', newCapacity / 2);
	
	
	// change all of the pointers
	TokenStart = newBuffer + (TokenStart - Buffer);
	Current = newBuffer + (Current - Buffer);
	
	// leave Limit at the place where the last good character is located
	Limit = newBuffer + (Limit - Buffer);
	Marker = newBuffer + (Marker - Buffer);
	
	delete[] Buffer;
	Buffer = newBuffer;
	BufferCapacity = newCapacity;
}
Beispiel #12
0
U_CFUNC void
ustr_cpy(struct UString *dst,
     const struct UString *src,
     UErrorCode *status)
{
    if(U_FAILURE(*status) || dst == src)
        return;

    if(dst->fCapacity < src->fLength) {
        ustr_resize(dst, ALLOCATION(src->fLength), status);
        if(U_FAILURE(*status))
            return;
    }
    if(src->fChars == NULL || dst->fChars == NULL){
        return;
    }
    u_memcpy(dst->fChars, src->fChars, src->fLength);
    dst->fLength = src->fLength;
    dst->fChars[dst->fLength] = 0x0000;
}
Beispiel #13
0
int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength, 
                        UChar* dest, int32_t destCapacity, 
                        UBool allowUnassigned,
                        UParseError* /*parseError*/,
                        UErrorCode& status ){

    if(U_FAILURE(status)){
        return 0;
    }
    //check arguments
    if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
        status=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    UnicodeString rsource(src,srcLength);
    // map the code points
    // transliteration also performs NFKC
    mapping->transliterate(rsource);
    
    const UChar* buffer = rsource.getBuffer();
    int32_t bufLen = rsource.length();
    // check if unassigned
    if(allowUnassigned == FALSE){
        int32_t bufIndex=0;
        UChar32 ch =0 ;
        for(;bufIndex<bufLen;){
            U16_NEXT(buffer, bufIndex, bufLen, ch);
            if(unassigned.contains(ch)){
                status = U_IDNA_UNASSIGNED_ERROR;
                return 0;
            }
        }
    }
    // check if there is enough room in the output
    if(bufLen < destCapacity){
        u_memcpy(dest, buffer, bufLen);
    }

    return u_terminateUChars(dest, destCapacity, bufLen, &status);
}
static void
addUnfolding(UChar32 c, const UChar *s, int32_t length) {
    int32_t i;

    if(length>UGENCASE_UNFOLD_STRING_WIDTH) {
        fprintf(stderr, "gencase error: case folding too long (length=%ld>%d=UGENCASE_UNFOLD_STRING_WIDTH)\n",
                (long)length, UGENCASE_UNFOLD_STRING_WIDTH);
        exit(U_INTERNAL_PROGRAM_ERROR);
    }
    if(unfoldTop >= (LENGTHOF(unfold) - UGENCASE_UNFOLD_STRING_WIDTH)) {
        fprintf(stderr, "gencase error: too many multi-character case foldings\n");
        exit(U_BUFFER_OVERFLOW_ERROR);
    }
    u_memset(unfold+unfoldTop, 0, UGENCASE_UNFOLD_WIDTH);
    u_memcpy(unfold+unfoldTop, s, length);

    i=unfoldTop+UGENCASE_UNFOLD_STRING_WIDTH;
    U16_APPEND_UNSAFE(unfold, i, c);

    ++unfoldRows;
    unfoldTop+=UGENCASE_UNFOLD_WIDTH;
}
Beispiel #15
0
/**
* Sets the source to the new source string.
*/
void CollationElementIterator::setText(const UnicodeString& source,
                                       UErrorCode& status)
{
    if (U_FAILURE(status)) {
        return;
    }

    int32_t length = source.length();
    UChar *string = NULL;
    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
        uprv_free((UChar *)m_data_->iteratordata_.string);
    }
    m_data_->isWritable = TRUE;
    if (length > 0) {
        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
        /* test for NULL */
        if (string == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        u_memcpy(string, source.getBuffer(), length);
    }
    else {
        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
        /* test for NULL */
        if (string == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        *string = 0;
    }
    /* Free offsetBuffer before initializing it. */
    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 
        &m_data_->iteratordata_, &status);

    m_data_->reset_   = TRUE;
}
Beispiel #16
0
/** 
* This is the "real" constructor for this class; it constructs an iterator over 
* the source text using the specified collator
*/
CollationElementIterator::CollationElementIterator(
                                           const CharacterIterator& sourceText,
                                           const RuleBasedCollator* order,
                                           UErrorCode& status)
                                           : isDataOwned_(TRUE)
{
    if (U_FAILURE(status))
        return;

    // **** should I just drop this test? ****
    /*
    if ( sourceText.endIndex() != 0 )
    {
        // A CollationElementIterator is really a two-layered beast.
        // Internally it uses a Normalizer to munge the source text into a form 
        // where all "composed" Unicode characters (such as \u00FC) are split into a 
        // normal character and a combining accent character.  
        // Afterward, CollationElementIterator does its own processing to handle
        // expanding and contracting collation sequences, ignorables, and so on.
        
        Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
                                ? Normalizer::NO_OP : order->getDecomposition();
          
        text = new Normalizer(sourceText, decomp);
        if (text == NULL)
        status = U_MEMORY_ALLOCATION_ERROR;    
    }
    */
    int32_t length = sourceText.getLength();
    UChar *buffer;
    if (length > 0) {
        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
        /* test for NULL */
        if (buffer == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        /* 
        Using this constructor will prevent buffer from being removed when
        string gets removed
        */
        UnicodeString string(buffer, length, length);
        ((CharacterIterator &)sourceText).getText(string);
        const UChar *temp = string.getBuffer();
        u_memcpy(buffer, temp, length);
    }
    else {
        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
        /* test for NULL */
        if (buffer == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        *buffer = 0;
    }
    m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);

    /* Test for buffer overflows */
    if (U_FAILURE(status)) {
        return;
    }
    m_data_->isWritable = TRUE;
}
Beispiel #17
0
U_CAPI int32_t U_EXPORT2
uloc_getDisplayName(const char * locale,
                    const char * displayLocale,
                    UChar * dest, int32_t destCapacity,
                    UErrorCode * pErrorCode)
{
	int32_t length, length2, length3 = 0;
	UBool hasLanguage, hasScript, hasCountry, hasVariant, hasKeywords;
	UEnumeration * keywordEnum = NULL;
	int32_t keywordCount = 0;
	const char * keyword = NULL;
	int32_t keywordLen = 0;
	char keywordValue[256];
	int32_t keywordValueLen = 0;

	int32_t locSepLen = 0;
	int32_t locPatLen = 0;
	int32_t p0Len = 0;
	int32_t defaultPatternLen = 9;
	const UChar * dispLocSeparator;
	const UChar * dispLocPattern;
	static const UChar defaultSeparator[3] = { 0x002c, 0x0020 , 0x0000 }; /* comma + space */
	static const UChar defaultPattern[10] = { 0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000 }; /* {0} ({1}) */
	static const UChar pat0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
	static const UChar pat1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */

	UResourceBundle * bundle = NULL;
	UResourceBundle * locdsppat = NULL;

	UErrorCode status = U_ZERO_ERROR;

	/* argument checking */
	if (pErrorCode == NULL || U_FAILURE(*pErrorCode))
	{
		return 0;
	}

	if (destCapacity < 0 || (destCapacity > 0 && dest == NULL))
	{
		*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
		return 0;
	}

	bundle    = ures_open(U_ICUDATA_LANG, displayLocale, &status);

	locdsppat = ures_getByKeyWithFallback(bundle, _kLocaleDisplayPattern, NULL, &status);
	dispLocSeparator = ures_getStringByKeyWithFallback(locdsppat, _kSeparator, &locSepLen, &status);
	dispLocPattern = ures_getStringByKeyWithFallback(locdsppat, _kPattern, &locPatLen, &status);

	/*close the bundles */
	ures_close(locdsppat);
	ures_close(bundle);

	/* If we couldn't find any data, then use the defaults */
	if (locSepLen == 0)
	{
		dispLocSeparator = defaultSeparator;
		locSepLen = 2;
	}

	if (locPatLen == 0)
	{
		dispLocPattern = defaultPattern;
		locPatLen = 9;
	}

	/*
	 * if there is a language, then write "language (country, variant)"
	 * otherwise write "country, variant"
	 */

	/* write the language */
	length = uloc_getDisplayLanguage(locale, displayLocale,
	                                 dest, destCapacity,
	                                 pErrorCode);
	hasLanguage = length > 0;

	if (hasLanguage)
	{
		p0Len = length;

		/* append " (" */
		if (length < destCapacity)
		{
			dest[length] = 0x20;
		}
		++length;
		if (length < destCapacity)
		{
			dest[length] = 0x28;
		}
		++length;
	}

	if (*pErrorCode == U_BUFFER_OVERFLOW_ERROR)
	{
		/* keep preflighting */
		*pErrorCode = U_ZERO_ERROR;
	}

	/* append the script */
	if (length < destCapacity)
	{
		length2 = uloc_getDisplayScript(locale, displayLocale,
		                                dest + length, destCapacity - length,
		                                pErrorCode);
	}
	else
	{
		length2 = uloc_getDisplayScript(locale, displayLocale,
		                                NULL, 0,
		                                pErrorCode);
	}
	hasScript = length2 > 0;
	length += length2;

	if (hasScript)
	{
		/* append separator */
		if (length + locSepLen <= destCapacity)
		{
			u_memcpy(dest + length, dispLocSeparator, locSepLen);
		}
		length += locSepLen;
	}

	if (*pErrorCode == U_BUFFER_OVERFLOW_ERROR)
	{
		/* keep preflighting */
		*pErrorCode = U_ZERO_ERROR;
	}

	/* append the country */
	if (length < destCapacity)
	{
		length2 = uloc_getDisplayCountry(locale, displayLocale,
		                                 dest + length, destCapacity - length,
		                                 pErrorCode);
	}
	else
	{
		length2 = uloc_getDisplayCountry(locale, displayLocale,
		                                 NULL, 0,
		                                 pErrorCode);
	}
	hasCountry = length2 > 0;
	length += length2;

	if (hasCountry)
	{
		/* append separator */
		if (length + locSepLen <= destCapacity)
		{
			u_memcpy(dest + length, dispLocSeparator, locSepLen);
		}
		length += locSepLen;
	}

	if (*pErrorCode == U_BUFFER_OVERFLOW_ERROR)
	{
		/* keep preflighting */
		*pErrorCode = U_ZERO_ERROR;
	}

	/* append the variant */
	if (length < destCapacity)
	{
		length2 = uloc_getDisplayVariant(locale, displayLocale,
		                                 dest + length, destCapacity - length,
		                                 pErrorCode);
	}
	else
	{
		length2 = uloc_getDisplayVariant(locale, displayLocale,
		                                 NULL, 0,
		                                 pErrorCode);
	}
	hasVariant = length2 > 0;
	length += length2;

	if (hasVariant)
	{
		/* append separator */
		if (length + locSepLen <= destCapacity)
		{
			u_memcpy(dest + length, dispLocSeparator, locSepLen);
		}
		length += locSepLen;
	}

	keywordEnum = uloc_openKeywords(locale, pErrorCode);

	for (keywordCount = uenum_count(keywordEnum, pErrorCode); keywordCount > 0 ; keywordCount--)
	{
		if (U_FAILURE(*pErrorCode))
		{
			break;
		}
		/* the uenum_next returns NUL terminated string */
		keyword = uenum_next(keywordEnum, &keywordLen, pErrorCode);
		if (length + length3 < destCapacity)
		{
			length3 += uloc_getDisplayKeyword(keyword, displayLocale, dest + length + length3, destCapacity - length - length3,
			                                  pErrorCode);
		}
		else
		{
			length3 += uloc_getDisplayKeyword(keyword, displayLocale, NULL, 0, pErrorCode);
		}
		if (*pErrorCode == U_BUFFER_OVERFLOW_ERROR)
		{
			/* keep preflighting */
			*pErrorCode = U_ZERO_ERROR;
		}
		keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, 256, pErrorCode);
		if (keywordValueLen)
		{
			if (length + length3 < destCapacity)
			{
				dest[length + length3] = 0x3D;
			}
			length3++;
			if (length + length3 < destCapacity)
			{
				length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, dest + length + length3,
				                                       destCapacity - length - length3, pErrorCode);
			}
			else
			{
				length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, NULL, 0, pErrorCode);
			}
			if (*pErrorCode == U_BUFFER_OVERFLOW_ERROR)
			{
				/* keep preflighting */
				*pErrorCode = U_ZERO_ERROR;
			}
		}
		if (keywordCount > 1)
		{
			if (length + length3 + locSepLen <= destCapacity && keywordCount)
			{
				u_memcpy(dest + length + length3, dispLocSeparator, locSepLen);
				length3 += locSepLen;
			}
		}
	}
	uenum_close(keywordEnum);

	hasKeywords = length3 > 0;
	length += length3;


	if ((hasScript && !hasCountry)
	    || ((hasScript || hasCountry) && !hasVariant && !hasKeywords)
	    || ((hasScript || hasCountry || hasVariant) && !hasKeywords))
	{
		/* Remove separator  */
		length -= locSepLen;
	}
	else if (hasLanguage && !hasScript && !hasCountry && !hasVariant && !hasKeywords)
	{
		/* Remove " (" */
		length -= 2;
	}

	if (hasLanguage && (hasScript || hasCountry || hasVariant || hasKeywords))
	{
		/* append ")" */
		if (length < destCapacity)
		{
			dest[length] = 0x29;
		}
		++length;

		/* If the localized display pattern is something other than the default pattern of "{0} ({1})", then
		 * then we need to do the formatting here.  It would be easier to use a messageFormat to do this, but we
		 * can't since we don't have the APIs in the i18n library available to us at this point.
		 */
		if (locPatLen != defaultPatternLen ||
		    u_strcmp(dispLocPattern, defaultPattern))  /* Something other than the default pattern */
		{
			UChar * p0 = u_strstr(dispLocPattern, pat0);
			UChar * p1 = u_strstr(dispLocPattern, pat1);
			u_terminateUChars(dest, destCapacity, length, pErrorCode);

			if (p0 != NULL && p1 != NULL)     /* The pattern is well formed */
			{
				if (dest)
				{
					int32_t destLen = 0;
					UChar * result = (UChar *)uprv_malloc((length + 1) * sizeof(UChar));
					UChar * upos = (UChar *)dispLocPattern;
					u_strcpy(result, dest);
					dest[0] = 0;
					while (*upos)
					{
						if (upos == p0)     /* Handle {0} substitution */
						{
							u_strncat(dest, result, p0Len);
							destLen += p0Len;
							dest[destLen] = 0; /* Null terminate */
							upos += 3;
						}
						else if (upos == p1)       /* Handle {1} substitution */
						{
							UChar * p1Start = &result[p0Len + 2];
							u_strncat(dest, p1Start, length - p0Len - 3);
							destLen += (length - p0Len - 3);
							dest[destLen] = 0; /* Null terminate */
							upos += 3;
						}
						else     /* Something from the pattern not {0} or {1} */
						{
							u_strncat(dest, upos, 1);
							upos++;
							destLen++;
							dest[destLen] = 0; /* Null terminate */
						}
					}
					length = destLen;
					uprv_free(result);
				}
			}
		}
	}
	if (*pErrorCode == U_BUFFER_OVERFLOW_ERROR)
	{
		/* keep preflighting */
		*pErrorCode = U_ZERO_ERROR;
	}

	return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
void MultithreadTest::TestCollators()
{

    UErrorCode status = U_ZERO_ERROR;
    FILE *testFile = NULL;
    char testDataPath[1024];
    strcpy(testDataPath, IntlTest::getSourceTestData(status));
    if (U_FAILURE(status)) {
        errln("ERROR: could not open test data %s", u_errorName(status));
        return;
    }
    strcat(testDataPath, "CollationTest_");

    const char* type = "NON_IGNORABLE";

    const char *ext = ".txt";
    if(testFile) {
        fclose(testFile);
    }
    char buffer[1024];
    strcpy(buffer, testDataPath);
    strcat(buffer, type);
    size_t bufLen = strlen(buffer);

    // we try to open 3 files:
    // path/CollationTest_type.txt
    // path/CollationTest_type_SHORT.txt
    // path/CollationTest_type_STUB.txt
    // we are going to test with the first one that we manage to open.

    strcpy(buffer+bufLen, ext);

    testFile = fopen(buffer, "rb");

    if(testFile == 0) {
        strcpy(buffer+bufLen, "_SHORT");
        strcat(buffer, ext);
        testFile = fopen(buffer, "rb");

        if(testFile == 0) {
            strcpy(buffer+bufLen, "_STUB");
            strcat(buffer, ext);
            testFile = fopen(buffer, "rb");

            if (testFile == 0) {
                *(buffer+bufLen) = 0;
                dataerrln("could not open any of the conformance test files, tried opening base %s", buffer);
                return;        
            } else {
                infoln(
                    "INFO: Working with the stub file.\n"
                    "If you need the full conformance test, please\n"
                    "download the appropriate data files from:\n"
                    "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/");
            }
        }
    }

    Line *lines = new Line[200000];
    memset(lines, 0, sizeof(Line)*200000);
    int32_t lineNum = 0;

    UChar bufferU[1024];
    int32_t buflen = 0;
    uint32_t first = 0;
    uint32_t offset = 0;

    while (fgets(buffer, 1024, testFile) != NULL) {
        offset = 0;
        if(*buffer == 0 || strlen(buffer) < 3 || buffer[0] == '#') {
            continue;
        }
        offset = u_parseString(buffer, bufferU, 1024, &first, &status);
        buflen = offset;
        bufferU[offset++] = 0;
        lines[lineNum].buflen = buflen;
        //lines[lineNum].buff = new UChar[buflen+1];
        u_memcpy(lines[lineNum].buff, bufferU, buflen);
        lineNum++;
    }
    fclose(testFile);
    if(U_FAILURE(status)) {
      dataerrln("Couldn't read the test file!");
      return;
    }

    UCollator *coll = ucol_open("root", &status);
    if(U_FAILURE(status)) {
        errcheckln(status, "Couldn't open UCA collator");
        return;
    }
    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
    ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);

    int32_t noSpawned = 0;
    int32_t spawnResult = 0;
    LocalArray<CollatorThreadTest> tests(new CollatorThreadTest[kCollatorThreadThreads]);

    logln(UnicodeString("Spawning: ") + kCollatorThreadThreads + " threads * " + kFormatThreadIterations + " iterations each.");
    int32_t j = 0;
    for(j = 0; j < kCollatorThreadThreads; j++) {
        //logln("Setting collator %i", j);
        tests[j].setCollator(coll, lines, lineNum);
    }
    for(j = 0; j < kCollatorThreadThreads; j++) {
        log("%i ", j);
        spawnResult = tests[j].start();
        if(spawnResult != 0) {
            infoln("THREAD INFO: Couldn't spawn more than %i threads", noSpawned);
            break;
        }
        noSpawned++;
    }
    logln("Spawned all");
    if (noSpawned == 0) {
        errln("No threads could be spawned.");
        return;
    }

    for(int32_t patience = kCollatorThreadPatience;patience > 0; patience --)
    {
        logln("Waiting...");

        int32_t i;
        int32_t terrs = 0;
        int32_t completed =0;

        for(i=0;i<kCollatorThreadThreads;i++)
        {
            if (tests[i].isRunning() == FALSE)
            {
                completed++;

                //logln(UnicodeString("Test #") + i + " is complete.. ");

                UnicodeString theErr;
                if(tests[i].getError(theErr))
                {
                    terrs++;
                    errln(UnicodeString("#") + i + ": " + theErr);
                }
                // print out the error, too, if any.
            }
        }
        logln("Completed %i tests", completed);

        if(completed == noSpawned)
        {
            logln("Done! All %i tests are finished", noSpawned);

            if(terrs)
            {
                errln("There were errors.");
                SimpleThread::errorFunc();
            }
            ucol_close(coll);
            //for(i = 0; i < lineNum; i++) {
            //delete[] lines[i].buff;
            //}
            delete[] lines;

            return;
        }

        SimpleThread::sleep(900);
    }
    errln("patience exceeded. ");
    SimpleThread::errorFunc();
    ucol_close(coll);
}
Beispiel #19
0
U_CAPI int32_t U_EXPORT2
uloc_getDisplayKeywordValue(   const char* locale,
                               const char* keyword,
                               const char* displayLocale,
                               UChar* dest,
                               int32_t destCapacity,
                               UErrorCode* status){


    char keywordValue[ULOC_FULLNAME_CAPACITY*4];
    int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
    int32_t keywordValueLen =0;

    /* argument checking */
    if(status==NULL || U_FAILURE(*status)) {
        return 0;
    }

    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
        *status=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* get the keyword value */
    keywordValue[0]=0;
    keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);

    /*
     * if the keyword is equal to currency .. then to get the display name
     * we need to do the fallback ourselves
     */
    if(uprv_stricmp(keyword, _kCurrency)==0){

        int32_t dispNameLen = 0;
        const UChar *dispName = NULL;

        UResourceBundle *bundle     = ures_open(U_ICUDATA_CURR, displayLocale, status);
        UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
        UResourceBundle *currency   = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);

        dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);

        /*close the bundles */
        ures_close(currency);
        ures_close(currencies);
        ures_close(bundle);

        if(U_FAILURE(*status)){
            if(*status == U_MISSING_RESOURCE_ERROR){
                /* we just want to write the value over if nothing is available */
                *status = U_USING_DEFAULT_WARNING;
            }else{
                return 0;
            }
        }

        /* now copy the dispName over if not NULL */
        if(dispName != NULL){
            if(dispNameLen <= destCapacity){
                u_memcpy(dest, dispName, dispNameLen);
                return u_terminateUChars(dest, destCapacity, dispNameLen, status);
            }else{
                *status = U_BUFFER_OVERFLOW_ERROR;
                return dispNameLen;
            }
        }else{
            /* we have not found the display name for the value .. just copy over */
            if(keywordValueLen <= destCapacity){
                u_charsToUChars(keywordValue, dest, keywordValueLen);
                return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
            }else{
                 *status = U_BUFFER_OVERFLOW_ERROR;
                return keywordValueLen;
            }
        }


    }else{

        return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
                                   _kTypes, keyword,
                                   keywordValue,
                                   keywordValue,
                                   dest, destCapacity,
                                   status);
    }
}
Beispiel #20
0
U_CAPI int32_t U_EXPORT2
uspoof_getSkeleton(const USpoofChecker *sc,
                   uint32_t type,
                   const UChar *s,  int32_t length,
                   UChar *dest, int32_t destCapacity,
                   UErrorCode *status) {

    // TODO:  this function could be sped up a bit
    //        Skip the input normalization when not needed, work from callers data.
    //        Put the initial skeleton straight into the caller's destination buffer.
    //        It probably won't need normalization.
    //        But these would make the structure more complicated.  

    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (U_FAILURE(*status)) {
        return 0;
    }
    if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL) ||
        (type & ~(USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE)) != 0) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

   int32_t tableMask = 0;
   switch (type) {
      case 0:
        tableMask = USPOOF_ML_TABLE_FLAG;
        break;
      case USPOOF_SINGLE_SCRIPT_CONFUSABLE:
        tableMask = USPOOF_SL_TABLE_FLAG;
        break;
      case USPOOF_ANY_CASE:
        tableMask = USPOOF_MA_TABLE_FLAG;
        break;
      case USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE:
        tableMask = USPOOF_SA_TABLE_FLAG;
        break;
      default:
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    // NFD transform of the user supplied input
    
    UChar nfdStackBuf[USPOOF_STACK_BUFFER_SIZE];
    UChar *nfdInput = nfdStackBuf;
    int32_t normalizedLen = unorm_normalize(
        s, length, UNORM_NFD, 0, nfdInput, USPOOF_STACK_BUFFER_SIZE, status);
    if (*status == U_BUFFER_OVERFLOW_ERROR) {
        nfdInput = (UChar *)uprv_malloc((normalizedLen+1)*sizeof(UChar));
        if (nfdInput == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            return 0;
        }
        *status = U_ZERO_ERROR;
        normalizedLen = unorm_normalize(s, length, UNORM_NFD, 0,
                                        nfdInput, normalizedLen+1, status);
    }
    if (U_FAILURE(*status)) {
        if (nfdInput != nfdStackBuf) {
            uprv_free(nfdInput);
        }
        return 0;
    }

    // buffer to hold the Unicode defined skeleton mappings for a single code point
    UChar buf[USPOOF_MAX_SKELETON_EXPANSION];

    // Apply the skeleton mapping to the NFD normalized input string
    // Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
    int32_t inputIndex = 0;
    UnicodeString skelStr;
    while (inputIndex < normalizedLen) {
        UChar32 c;
        U16_NEXT(nfdInput, inputIndex, normalizedLen, c);
        int32_t replaceLen = This->confusableLookup(c, tableMask, buf);
        skelStr.append(buf, replaceLen);
    }

    if (nfdInput != nfdStackBuf) {
        uprv_free(nfdInput);
    }
    
    const UChar *result = skelStr.getBuffer();
    int32_t  resultLen  = skelStr.length();
    UChar   *normedResult = NULL;

    // Check the skeleton for NFD, normalize it if needed.
    // Unnormalized results should be very rare.
    if (!unorm_isNormalized(result, resultLen, UNORM_NFD, status)) {
        normalizedLen = unorm_normalize(result, resultLen, UNORM_NFD, 0, NULL, 0, status);
        normedResult = static_cast<UChar *>(uprv_malloc((normalizedLen+1)*sizeof(UChar)));
        if (normedResult == NULL) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            return 0;
        }
        *status = U_ZERO_ERROR;
        unorm_normalize(result, resultLen, UNORM_NFD, 0, normedResult, normalizedLen+1, status);
        result = normedResult;
        resultLen = normalizedLen;
    }

    // Copy the skeleton to the caller's buffer
    if (U_SUCCESS(*status)) {
        if (destCapacity == 0 || resultLen > destCapacity) {
            *status = resultLen>destCapacity ? U_BUFFER_OVERFLOW_ERROR : U_STRING_NOT_TERMINATED_WARNING;
        } else {
            u_memcpy(dest, result, resultLen);
            if (destCapacity > resultLen) {
                dest[resultLen] = 0;
            } else {
                *status = U_STRING_NOT_TERMINATED_WARNING;
            }
        }
     }       
     uprv_free(normedResult);
     return resultLen;
}
Beispiel #21
0
void match_verify( match *m, UChar *text, int tlen )
{
    // sanity check match
    match_verify_end( m );
    UChar *copy = calloc( m->len, sizeof(UChar) );
    if ( copy != NULL )
    {
        card *c = m->start.current;
        int i=0;
        int count = 0;
        while ( c != NULL && i <= m->len )
        {
            pair *p = card_pair(c);
            UChar *pdata = pair_data(p);
            if ( m->start.current == m->end.current )
            {
                count = (m->prev.pos-m->start.pos)+1;
                if ( i+count<=m->len )
                    u_memcpy(&copy[i], &pdata[m->start.pos], count);
                i += count;
                break;
            }
            else if ( m->start.current == c )
            {
                count = pair_len(p)-m->start.pos;
                if ( i+count<=m->len )
                    u_memcpy(&copy[i], &pdata[m->start.pos], count);
                i += count;
            }
            else if ( c == m->prev.current )
            {
                count = m->prev.pos+1;
                if ( i+count<=m->len )
                    u_memcpy(&copy[i], pdata, count);
                i += count;
                break;
            }
            else
            {
                count = pair_len(p);
                if ( count>0 && i+count<=m->len )
                    u_memcpy(&copy[i], pdata, count);
                i += count;
            }
            c = card_next( c, m->bs, 0 );
        }
        int j,mend=i;
        int tend = m->text_off+m->len;
        if ( m->len != i )
            printf("match: source %d and dest %d different lengths\n",i,m->len);
        for ( i=0,j=m->text_off;i<mend&&j<tend;i++,j++ )
        {
            if ( text[j] != copy[i] )
            {
                printf("match: mismatch!\n");
                break;
            }
        }
        free( copy );
    }
    if ( m->next != NULL )
        match_verify( m->next, text, tlen );
}
Beispiel #22
0
/* internal function */
U_CFUNC int32_t
u_strcmpFold(const UChar *s1, int32_t length1,
             const UChar *s2, int32_t length2,
             uint32_t options,
             UErrorCode *pErrorCode) {
    const UCaseProps *csp;

    /* current-level start/limit - s1/s2 as current */
    const UChar *start1, *start2, *limit1, *limit2;

    /* case folding variables */
    const UChar *p;
    int32_t length;

    /* stacks of previous-level start/current/limit */
    CmpEquivLevel stack1[2], stack2[2];

    /* case folding buffers, only use current-level start/limit */
    UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];

    /* track which is the current level per string */
    int32_t level1, level2;

    /* current code units, and code points for lookups */
    UChar32 c1, c2, cp1, cp2;

    /* no argument error checking because this itself is not an API */

    /*
     * assume that at least the option U_COMPARE_IGNORE_CASE is set
     * otherwise this function would have to behave exactly as uprv_strCompare()
     */
    csp=ucase_getSingleton();
    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }

    /* initialize */
    start1=s1;
    if(length1==-1) {
        limit1=NULL;
    } else {
        limit1=s1+length1;
    }

    start2=s2;
    if(length2==-1) {
        limit2=NULL;
    } else {
        limit2=s2+length2;
    }

    level1=level2=0;
    c1=c2=-1;

    /* comparison loop */
    for(;;) {
        /*
         * here a code unit value of -1 means "get another code unit"
         * below it will mean "this source is finished"
         */

        if(c1<0) {
            /* get next code unit from string 1, post-increment */
            for(;;) {
                if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
                    if(level1==0) {
                        c1=-1;
                        break;
                    }
                } else {
                    ++s1;
                    break;
                }

                /* reached end of level buffer, pop one level */
                do {
                    --level1;
                    start1=stack1[level1].start;
                } while(start1==NULL);
                s1=stack1[level1].s;
                limit1=stack1[level1].limit;
            }
        }

        if(c2<0) {
            /* get next code unit from string 2, post-increment */
            for(;;) {
                if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
                    if(level2==0) {
                        c2=-1;
                        break;
                    }
                } else {
                    ++s2;
                    break;
                }

                /* reached end of level buffer, pop one level */
                do {
                    --level2;
                    start2=stack2[level2].start;
                } while(start2==NULL);
                s2=stack2[level2].s;
                limit2=stack2[level2].limit;
            }
        }

        /*
         * compare c1 and c2
         * either variable c1, c2 is -1 only if the corresponding string is finished
         */
        if(c1==c2) {
            if(c1<0) {
                return 0;   /* c1==c2==-1 indicating end of strings */
            }
            c1=c2=-1;       /* make us fetch new code units */
            continue;
        } else if(c1<0) {
            return -1;      /* string 1 ends before string 2 */
        } else if(c2<0) {
            return 1;       /* string 2 ends before string 1 */
        }
        /* c1!=c2 && c1>=0 && c2>=0 */

        /* get complete code points for c1, c2 for lookups if either is a surrogate */
        cp1=c1;
        if(U_IS_SURROGATE(c1)) {
            UChar c;

            if(U_IS_SURROGATE_LEAD(c1)) {
                if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
                    /* advance ++s1; only below if cp1 decomposes/case-folds */
                    cp1=U16_GET_SUPPLEMENTARY(c1, c);
                }
            } else /* isTrail(c1) */ {
                if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
                    cp1=U16_GET_SUPPLEMENTARY(c, c1);
                }
            }
        }

        cp2=c2;
        if(U_IS_SURROGATE(c2)) {
            UChar c;

            if(U_IS_SURROGATE_LEAD(c2)) {
                if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
                    /* advance ++s2; only below if cp2 decomposes/case-folds */
                    cp2=U16_GET_SUPPLEMENTARY(c2, c);
                }
            } else /* isTrail(c2) */ {
                if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
                    cp2=U16_GET_SUPPLEMENTARY(c, c2);
                }
            }
        }

        /*
         * go down one level for each string
         * continue with the main loop as soon as there is a real change
         */

        if( level1==0 &&
            (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
        ) {
            /* cp1 case-folds to the code point "length" or to p[length] */
            if(U_IS_SURROGATE(c1)) {
                if(U_IS_SURROGATE_LEAD(c1)) {
                    /* advance beyond source surrogate pair if it case-folds */
                    ++s1;
                } else /* isTrail(c1) */ {
                    /*
                     * we got a supplementary code point when hitting its trail surrogate,
                     * therefore the lead surrogate must have been the same as in the other string;
                     * compare this decomposition with the lead surrogate in the other string
                     * remember that this simulates bulk text replacement:
                     * the decomposition would replace the entire code point
                     */
                    --s2;
                    c2=*(s2-1);
                }
            }

            /* push current level pointers */
            stack1[0].start=start1;
            stack1[0].s=s1;
            stack1[0].limit=limit1;
            ++level1;

            /* copy the folding result to fold1[] */
            if(length<=UCASE_MAX_STRING_LENGTH) {
                u_memcpy(fold1, p, length);
            } else {
                int32_t i=0;
                U16_APPEND_UNSAFE(fold1, i, length);
                length=i;
            }

            /* set next level pointers to case folding */
            start1=s1=fold1;
            limit1=fold1+length;

            /* get ready to read from decomposition, continue with loop */
            c1=-1;
            continue;
        }

        if( level2==0 &&
            (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
        ) {
            /* cp2 case-folds to the code point "length" or to p[length] */
            if(U_IS_SURROGATE(c2)) {
                if(U_IS_SURROGATE_LEAD(c2)) {
                    /* advance beyond source surrogate pair if it case-folds */
                    ++s2;
                } else /* isTrail(c2) */ {
                    /*
                     * we got a supplementary code point when hitting its trail surrogate,
                     * therefore the lead surrogate must have been the same as in the other string;
                     * compare this decomposition with the lead surrogate in the other string
                     * remember that this simulates bulk text replacement:
                     * the decomposition would replace the entire code point
                     */
                    --s1;
                    c1=*(s1-1);
                }
            }

            /* push current level pointers */
            stack2[0].start=start2;
            stack2[0].s=s2;
            stack2[0].limit=limit2;
            ++level2;

            /* copy the folding result to fold2[] */
            if(length<=UCASE_MAX_STRING_LENGTH) {
                u_memcpy(fold2, p, length);
            } else {
                int32_t i=0;
                U16_APPEND_UNSAFE(fold2, i, length);
                length=i;
            }

            /* set next level pointers to case folding */
            start2=s2=fold2;
            limit2=fold2+length;

            /* get ready to read from decomposition, continue with loop */
            c2=-1;
            continue;
        }

        /*
         * no decomposition/case folding, max level for both sides:
         * return difference result
         *
         * code point order comparison must not just return cp1-cp2
         * because when single surrogates are present then the surrogate pairs
         * that formed cp1 and cp2 may be from different string indexes
         *
         * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
         * c1=d800 cp1=10001 c2=dc00 cp2=10000
         * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
         *
         * therefore, use same fix-up as in ustring.c/uprv_strCompare()
         * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
         * so we have slightly different pointer/start/limit comparisons here
         */

        if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
            if(
                (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
                (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
            ) {
                /* part of a surrogate pair, leave >=d800 */
            } else {
                /* BMP code point - may be surrogate code point - make <d800 */
                c1-=0x2800;
            }

            if(
                (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
                (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
            ) {
                /* part of a surrogate pair, leave >=d800 */
            } else {
                /* BMP code point - may be surrogate code point - make <d800 */
                c2-=0x2800;
            }
        }

        return c1-c2;
    }
}
Beispiel #23
0
void MultithreadTest::TestCollators()
{

    UErrorCode status = U_ZERO_ERROR;
    FILE *testFile = NULL;
    char testDataPath[1024];
    strcpy(testDataPath, IntlTest::getSourceTestData(status));
    if (U_FAILURE(status)) {
        errln("ERROR: could not open test data %s", u_errorName(status));
        return;
    }
    strcat(testDataPath, "CollationTest_");

    const char* type = "NON_IGNORABLE";

    const char *ext = ".txt";
    if(testFile) {
        fclose(testFile);
    }
    char buffer[1024];
    strcpy(buffer, testDataPath);
    strcat(buffer, type);
    size_t bufLen = strlen(buffer);

    // we try to open 3 files:
    // path/CollationTest_type.txt
    // path/CollationTest_type_SHORT.txt
    // path/CollationTest_type_STUB.txt
    // we are going to test with the first one that we manage to open.

    strcpy(buffer+bufLen, ext);

    testFile = fopen(buffer, "rb");

    if(testFile == 0) {
        strcpy(buffer+bufLen, "_SHORT");
        strcat(buffer, ext);
        testFile = fopen(buffer, "rb");

        if(testFile == 0) {
            strcpy(buffer+bufLen, "_STUB");
            strcat(buffer, ext);
            testFile = fopen(buffer, "rb");

            if (testFile == 0) {
                *(buffer+bufLen) = 0;
                dataerrln("could not open any of the conformance test files, tried opening base %s", buffer);
                return;
            } else {
                infoln(
                    "INFO: Working with the stub file.\n"
                    "If you need the full conformance test, please\n"
                    "download the appropriate data files from:\n"
                    "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/");
            }
        }
    }

    LocalArray<Line> lines(new Line[200000]);
    memset(lines.getAlias(), 0, sizeof(Line)*200000);
    int32_t lineNum = 0;

    UChar bufferU[1024];
    uint32_t first = 0;

    while (fgets(buffer, 1024, testFile) != NULL) {
        if(*buffer == 0 || buffer[0] == '#') {
            // Store empty and comment lines so that errors are reported
            // for the real test file lines.
            lines[lineNum].buflen = 0;
            lines[lineNum].buff[0] = 0;
        } else {
            int32_t buflen = u_parseString(buffer, bufferU, 1024, &first, &status);
            lines[lineNum].buflen = buflen;
            u_memcpy(lines[lineNum].buff, bufferU, buflen);
            lines[lineNum].buff[buflen] = 0;
        }
        lineNum++;
    }
    fclose(testFile);
    if(U_FAILURE(status)) {
      dataerrln("Couldn't read the test file!");
      return;
    }

    UVersionInfo uniVersion;
    static const UVersionInfo v62 = { 6, 2, 0, 0 };
    u_getUnicodeVersion(uniVersion);
    UBool isAtLeastUCA62 = uprv_memcmp(uniVersion, v62, 4) >= 0;

    LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), status));
    if(U_FAILURE(status)) {
        errcheckln(status, "Couldn't open UCA collator");
        return;
    }
    coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status);
    coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status);
    coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_TERTIARY, status);
    coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, status);

    int32_t spawnResult = 0;
    LocalArray<CollatorThreadTest> tests(new CollatorThreadTest[kCollatorThreadThreads]);

    logln(UnicodeString("Spawning: ") + kCollatorThreadThreads + " threads * " + kFormatThreadIterations + " iterations each.");
    int32_t j = 0;
    for(j = 0; j < kCollatorThreadThreads; j++) {
        //logln("Setting collator %i", j);
        tests[j].setCollator(coll.getAlias(), lines.getAlias(), lineNum, isAtLeastUCA62);
    }
    for(j = 0; j < kCollatorThreadThreads; j++) {
        log("%i ", j);
        spawnResult = tests[j].start();
        if(spawnResult != 0) {
            errln("%s:%d THREAD INFO: thread %d failed to start with status %d", __FILE__, __LINE__, j, spawnResult);
            return;
        }
    }
    logln("Spawned all");

    for(int32_t i=0;i<kCollatorThreadThreads;i++) {
        tests[i].join();
        //logln(UnicodeString("Test #") + i + " is complete.. ");
    }
}