Example #1
0
/* test one string with the ICU and the reference BOCU-1 implementations */
static void
roundtripBOCU1(UConverter *bocu1, int32_t number, const UChar *text, int32_t length) {
    UChar *roundtripRef, *roundtripICU;
    char *bocu1Ref, *bocu1ICU;

    int32_t bocu1RefLength, bocu1ICULength, roundtripRefLength, roundtripICULength;
    UErrorCode errorCode;

    roundtripRef = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
    roundtripICU = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar));
    bocu1Ref = malloc(DEFAULT_BUFFER_SIZE);
    bocu1ICU = malloc(DEFAULT_BUFFER_SIZE);

    /* Unicode -> BOCU-1 */
    bocu1RefLength=writeString(text, length, (uint8_t *)bocu1Ref);

    errorCode=U_ZERO_ERROR;
    bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, DEFAULT_BUFFER_SIZE, text, length, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("ucnv_fromUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
        goto cleanup; 
    }

    if(bocu1RefLength!=bocu1ICULength || 0!=uprv_memcmp(bocu1Ref, bocu1ICU, bocu1RefLength)) {
        log_err("Unicode(%d)[%d] -> BOCU-1: reference[%d]!=ICU[%d]\n", number, length, bocu1RefLength, bocu1ICULength);
        goto cleanup;
    }

    /* BOCU-1 -> Unicode */
    roundtripRefLength=readString((uint8_t *)bocu1Ref, bocu1RefLength, roundtripRef);
    if(roundtripRefLength<0) {
        goto cleanup; /* readString() found an error and reported it */
    }

    roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, DEFAULT_BUFFER_SIZE, bocu1ICU, bocu1ICULength, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("ucnv_toUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode));
        goto cleanup;
    }

    if(length!=roundtripRefLength || 0!=u_memcmp(text, roundtripRef, length)) {
        log_err("BOCU-1 -> Unicode: original(%d)[%d]!=reference[%d]\n", number, length, roundtripRefLength);
        goto cleanup;
    }
    if(roundtripRefLength!=roundtripICULength || 0!=u_memcmp(roundtripRef, roundtripICU, roundtripRefLength)) {
        log_err("BOCU-1 -> Unicode: reference(%d)[%d]!=ICU[%d]\n", number, roundtripRefLength, roundtripICULength);
        goto cleanup;
    }
cleanup:
    free(roundtripRef);
    free(roundtripICU);
    free(bocu1Ref);
    free(bocu1ICU);
}
void
CasePropsBuilder::makeUnfoldData(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }

    UChar *p, *q;
    int32_t i, j, k;

    /* sort the data */
    int32_t unfoldLength=unfold.length();
    int32_t unfoldRows=unfoldLength/UGENCASE_UNFOLD_WIDTH-1;
    UChar *unfoldBuffer=unfold.getBuffer(-1);
    uprv_sortArray(unfoldBuffer+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
                   compareUnfold, NULL, FALSE, &errorCode);

    /* make unique-string rows by merging adjacent ones' code point columns */

    /* make p point to row i-1 */
    p=unfoldBuffer+UGENCASE_UNFOLD_WIDTH;

    for(i=1; i<unfoldRows;) {
        if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
            /* concatenate code point columns */
            q=p+UGENCASE_UNFOLD_STRING_WIDTH;
            for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
            for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
                q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
            }
            if(j>UGENCASE_UNFOLD_CP_WIDTH) {
                fprintf(stderr, "genprops error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
                        (long)j, UGENCASE_UNFOLD_CP_WIDTH);
                errorCode=U_BUFFER_OVERFLOW_ERROR;
                return;
            }

            /* move following rows up one */
            --unfoldRows;
            u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
        } else {
            p+=UGENCASE_UNFOLD_WIDTH;
            ++i;
        }
    }

    unfoldBuffer[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;

    if(beVerbose) {
        puts("unfold data:");

        p=unfoldBuffer;
        for(i=0; i<unfoldRows; ++i) {
            p+=UGENCASE_UNFOLD_WIDTH;
            printf("[%2d] %04x %04x %04x <- %04x %04x\n",
                   (int)i, p[0], p[1], p[2], p[3], p[4]);
        }
    }

    unfold.releaseBuffer((unfoldRows+1)*UGENCASE_UNFOLD_WIDTH);
}
Example #3
0
UBool
UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
    if(this==&other) {
        return TRUE;
    }
    if(!LinearMatchNode::operator==(other)) {
        return FALSE;
    }
    const UCTLinearMatchNode &o=(const UCTLinearMatchNode &)other;
    return 0==u_memcmp(s, o.s, length);
}
static void TestOpenClose() {
    UErrorCode errorCode=U_ZERO_ERROR;
    UDateTimePatternGenerator *dtpg, *dtpg2;
    const UChar *s;
    int32_t length;

    /* Open a DateTimePatternGenerator for the default locale. */
    dtpg=udatpg_open(NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err_status(errorCode, "udatpg_open(NULL) failed - %s\n", u_errorName(errorCode));
        return;
    }
    udatpg_close(dtpg);

    /* Now one for German. */
    dtpg=udatpg_open("de", &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("udatpg_open(de) failed - %s\n", u_errorName(errorCode));
        return;
    }

    /* Make some modification which we verify gets passed on to the clone. */
    udatpg_setDecimal(dtpg, pipeString, 1);

    /* Clone the generator. */
    dtpg2=udatpg_clone(dtpg, &errorCode);
    if(U_FAILURE(errorCode) || dtpg2==NULL) {
        log_err("udatpg_clone() failed - %s\n", u_errorName(errorCode));
        return;
    }

    /* Verify that the clone has the custom decimal symbol. */
    s=udatpg_getDecimal(dtpg2, &length);
    if(s==pipeString || length!=1 || 0!=u_memcmp(s, pipeString, length) || s[length]!=0) { 
        log_err("udatpg_getDecimal(cloned object) did not return the expected string\n");
        return;
    }

    udatpg_close(dtpg);
    udatpg_close(dtpg2);
}
Example #5
0
/* {{{ grapheme_memnstr_grapheme: find needle in haystack using grapheme boundaries */
inline int32_t
grapheme_memnstr_grapheme(UBreakIterator *bi, UChar *haystack, UChar *needle, int32_t needle_len, UChar *end)
{
	UChar *p = haystack;
	UChar ne = needle[needle_len-1];
	UErrorCode status;
	int32_t grapheme_offset;
	
	end -= needle_len;

	while (p <= end) {

		if ((p = u_memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {

			if (!u_memcmp(needle, p, needle_len - 1)) {  /* needle_len - 1 works because if needle_len is 1, we've already tested the char */

				/* does the grapheme end here? */

				status = U_ZERO_ERROR;
				ubrk_setText (bi, haystack, (end - haystack) + needle_len, &status);

				if ( ubrk_isBoundary (bi, (p - haystack) + needle_len) ) {

					/* found it, get grapheme count offset */
					grapheme_offset = grapheme_count_graphemes(bi, haystack, (p - haystack));

					return grapheme_offset;
				}
			}
		}

		if (p == NULL) {
			return -1;
		}

		p++;
	}

	return -1;
}
static void TestBuilder() {
    UErrorCode errorCode=U_ZERO_ERROR;
    UDateTimePatternGenerator *dtpg;
    UDateTimePatternConflict conflict;
    UEnumeration *en;
    UChar result[20];
    int32_t length, pLength;  
    const UChar *s, *p;
    const UChar* ptrResult[2]; 
    int32_t count=0;
    UDateTimePatternGenerator *generator;
    int32_t formattedCapacity, resultLen,patternCapacity ;
    UChar   pattern[40], formatted[40];
    UDateFormat *formatter;
    UDate sampleDate = 837039928046.0;
    static const char locale[]= "fr";
    UErrorCode status=U_ZERO_ERROR;
    
    /* test create an empty DateTimePatternGenerator */
    dtpg=udatpg_openEmpty(&errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("udatpg_openEmpty() failed - %s\n", u_errorName(errorCode));
        return;
    }
    
    /* Add a pattern */
    conflict = udatpg_addPattern(dtpg, redundantPattern, 5, FALSE, result, 20, 
                                 &length, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("udatpg_addPattern() failed - %s\n", u_errorName(errorCode));
        return;
    }
    /* Add a redundant pattern */
    conflict = udatpg_addPattern(dtpg, redundantPattern, 5, FALSE, result, 20,
                                 &length, &errorCode);
    if(conflict == UDATPG_NO_CONFLICT) {
        log_err("udatpg_addPattern() failed to find the duplicate pattern.\n");
        return;
    }
    /* Test pattern == NULL */
    s=NULL;
    length = udatpg_addPattern(dtpg, s, 0, FALSE, result, 20,
                               &length, &errorCode);
    if(!U_FAILURE(errorCode)&&(length!=0) ) {
        log_err("udatpg_addPattern failed in illegal argument - pattern is NULL.\n");
        return;
    }

    /* replace field type */
    errorCode=U_ZERO_ERROR;
    conflict = udatpg_addPattern(dtpg, testPattern2, 7, FALSE, result, 20,
                                 &length, &errorCode);
    if((conflict != UDATPG_NO_CONFLICT)||U_FAILURE(errorCode)) {
        log_err("udatpg_addPattern() failed to add HH:mm v. - %s\n", u_errorName(errorCode));
        return;
    }
    length = udatpg_replaceFieldTypes(dtpg, testPattern2, 7, replacedStr, 4,
                                      result, 20, &errorCode);
    if (U_FAILURE(errorCode) || (length==0) ) {
        log_err("udatpg_replaceFieldTypes failed!\n");
        return;
    }
    
    /* Get all skeletons and the crroespong pattern for each skeleton. */
    ptrResult[0] = testPattern2;
    ptrResult[1] = redundantPattern; 
    count=0;
    en = udatpg_openSkeletons(dtpg, &errorCode);  
    if (U_FAILURE(errorCode) || (length==0) ) {
        log_err("udatpg_openSkeletons failed!\n");
        return;
    }
    while ( (s=uenum_unext(en, &length, &errorCode))!= NULL) {
        p = udatpg_getPatternForSkeleton(dtpg, s, length, &pLength);
        if (U_FAILURE(errorCode) || p==NULL || u_memcmp(p, ptrResult[count], pLength)!=0 ) {
            log_err("udatpg_getPatternForSkeleton failed!\n");
            return;
        }
        count++;
    }
    uenum_close(en);
    
    /* Get all baseSkeletons */
    en = udatpg_openBaseSkeletons(dtpg, &errorCode);
    count=0;
    while ( (s=uenum_unext(en, &length, &errorCode))!= NULL) {
        p = udatpg_getPatternForSkeleton(dtpg, s, length, &pLength);
        if (U_FAILURE(errorCode) || p==NULL || u_memcmp(p, resultBaseSkeletons[count], pLength)!=0 ) {
            log_err("udatpg_getPatternForSkeleton failed!\n");
            return;
        }
        count++;
    }
    if (U_FAILURE(errorCode) || (length==0) ) {
        log_err("udatpg_openSkeletons failed!\n");
        return;
    }
    uenum_close(en);
    
    udatpg_close(dtpg);
    
    /* sample code in Userguide */
    patternCapacity = (int32_t)(sizeof(pattern)/sizeof((pattern)[0]));
    status=U_ZERO_ERROR;
    generator=udatpg_open(locale, &status);
    if(U_FAILURE(status)) {
        return;
    }

    /* get a pattern for an abbreviated month and day */
    length = udatpg_getBestPattern(generator, skeleton, 4,
                                   pattern, patternCapacity, &status);
    formatter = udat_open(UDAT_IGNORE, UDAT_DEFAULT, locale, timeZoneGMT, -1,
                          pattern, length, &status);
    if (formatter==NULL) {
        log_err("Failed to initialize the UDateFormat of the sample code in Userguide.\n");
        udatpg_close(generator);
        return;
    }

    /* use it to format (or parse) */
    formattedCapacity = (int32_t)(sizeof(formatted)/sizeof((formatted)[0]));
    resultLen=udat_format(formatter, ucal_getNow(), formatted, formattedCapacity,
                          NULL, &status);
    /* for French, the result is "13 sept." */

    /* cannot use the result from ucal_getNow() because the value change evreyday. */
    resultLen=udat_format(formatter, sampleDate, formatted, formattedCapacity,
                          NULL, &status);
    if ( u_memcmp(sampleFormatted, formatted, resultLen) != 0 ) {
        log_err("Failed udat_format() of sample code in Userguide.\n");
    }
    udatpg_close(generator);
    udat_close(formatter);
}
static void TestUsage() {
    UErrorCode errorCode=U_ZERO_ERROR;
    UDateTimePatternGenerator *dtpg;
    UChar bestPattern[20];
    UChar result[20];
    int32_t length;    
    UChar *s;
    const UChar *r;
    
    dtpg=udatpg_open("fi", &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err_status(errorCode, "udatpg_open(fi) failed - %s\n", u_errorName(errorCode));
        return;
    }
    length = udatpg_getBestPattern(dtpg, testSkeleton1, 4,
                                   bestPattern, 20, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("udatpg_getBestPattern failed - %s\n", u_errorName(errorCode));
        return;
    }
    if((u_memcmp(bestPattern, expectingBestPattern, length)!=0) || bestPattern[length]!=0) { 
        log_err("udatpg_getBestPattern did not return the expected string\n");
        return;
    }
    
    
    /* Test skeleton == NULL */
    s=NULL;
    length = udatpg_getBestPattern(dtpg, s, 0, bestPattern, 20, &errorCode);
    if(!U_FAILURE(errorCode)&&(length!=0) ) {
        log_err("udatpg_getBestPattern failed in illegal argument - skeleton is NULL.\n");
        return;
    }
    
    /* Test udatpg_getSkeleton */
    length = udatpg_getSkeleton(dtpg, testPattern, 5, result, 20,  &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("udatpg_getSkeleton failed - %s\n", u_errorName(errorCode));
        return;
    }
    if((u_memcmp(result, expectingSkeleton, length)!=0) || result[length]!=0) { 
        log_err("udatpg_getSkeleton did not return the expected string\n");
        return;
    }
    
    /* Test pattern == NULL */
    s=NULL;
    length = udatpg_getSkeleton(dtpg, s, 0, result, 20, &errorCode);
    if(!U_FAILURE(errorCode)&&(length!=0) ) {
        log_err("udatpg_getSkeleton failed in illegal argument - pattern is NULL.\n");
        return;
    }    
    
    /* Test udatpg_getBaseSkeleton */
    length = udatpg_getBaseSkeleton(dtpg, testPattern, 5, result, 20,  &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("udatpg_getBaseSkeleton failed - %s\n", u_errorName(errorCode));
        return;
    }
    if((u_memcmp(result, expectingBaseSkeleton, length)!=0) || result[length]!=0) { 
        log_err("udatpg_getBaseSkeleton did not return the expected string\n");
        return;
    }
    
    /* Test pattern == NULL */
    s=NULL;
    length = udatpg_getBaseSkeleton(dtpg, s, 0, result, 20, &errorCode);
    if(!U_FAILURE(errorCode)&&(length!=0) ) {
        log_err("udatpg_getBaseSkeleton failed in illegal argument - pattern is NULL.\n");
        return;
    }
    
    /* set append format to {1}{0} */
    udatpg_setAppendItemFormat( dtpg, UDATPG_MONTH_FIELD, testFormat, 7 );
    r = udatpg_getAppendItemFormat(dtpg, UDATPG_MONTH_FIELD, &length);
    
    
    if(length!=7 || 0!=u_memcmp(r, testFormat, length) || r[length]!=0) { 
        log_err("udatpg_setAppendItemFormat did not return the expected string\n");
        return;
    }
    
    /* set append name to hr */
    udatpg_setAppendItemName( dtpg, UDATPG_HOUR_FIELD, appendItemName, 7 );
    r = udatpg_getAppendItemName(dtpg, UDATPG_HOUR_FIELD, &length);
    
    if(length!=7 || 0!=u_memcmp(r, appendItemName, length) || r[length]!=0) { 
        log_err("udatpg_setAppendItemName did not return the expected string\n");
        return;
    }
    
    /* set date time format to {1}{0} */
    udatpg_setDateTimeFormat( dtpg, testFormat, 7 );
    r = udatpg_getDateTimeFormat(dtpg, &length);
    
    if(length!=7 || 0!=u_memcmp(r, testFormat, length) || r[length]!=0) { 
        log_err("udatpg_setDateTimeFormat did not return the expected string\n");
        return;
    }
    udatpg_close(dtpg);
}
Example #8
0
/* Try titlecasing with options. */
static void
TestUCaseMapToTitle(void) {
    /* "a 'CaT. A 'dOg! 'eTc." where '=U+02BB */
    /*
     * Note: The sentence BreakIterator does not recognize a '.'
     * as a sentence terminator if it is followed by lowercase.
     * That is why the example has the '!'.
     */
    static const UChar

    beforeTitle[]=      { 0x61, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x54, 0x63, 0x2e },
    titleWord[]=        { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x44, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x74, 0x63, 0x2e },
    titleWordNoAdjust[]={ 0x41, 0x20, 0x2bb, 0x63, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x74, 0x63, 0x2e },
    titleSentNoLower[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x54, 0x63, 0x2e };

    UChar buffer[32];
    UCaseMap *csm;
    UBreakIterator *sentenceIter;
    const UBreakIterator *iter;
    int32_t length;
    UErrorCode errorCode;

    errorCode=U_ZERO_ERROR;
    csm=ucasemap_open("", 0, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("ucasemap_open(\"\") failed - %s\n", u_errorName(errorCode));
        return;
    }

    iter=ucasemap_getBreakIterator(csm);
    if(iter!=NULL) {
        log_err("ucasemap_getBreakIterator() returns %p!=NULL before setting any iterator or titlecasing\n", iter);
    }

    /* Use default UBreakIterator: Word breaks. */
    length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
    if( U_FAILURE(errorCode) ||
        length!=UPRV_LENGTHOF(titleWord) ||
        0!=u_memcmp(buffer, titleWord, length) ||
        buffer[length]!=0
    ) {
        log_err_status(errorCode, "ucasemap_toTitle(default iterator)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
    }
    if (U_SUCCESS(errorCode)) {
        iter=ucasemap_getBreakIterator(csm);
        if(iter==NULL) {
            log_err("ucasemap_getBreakIterator() returns NULL after titlecasing\n");
        }
    }

    /* Try U_TITLECASE_NO_BREAK_ADJUSTMENT. */
    ucasemap_setOptions(csm, U_TITLECASE_NO_BREAK_ADJUSTMENT, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err_status(errorCode, "error: ucasemap_setOptions(U_TITLECASE_NO_BREAK_ADJUSTMENT) failed - %s\n", u_errorName(errorCode));
        return;
    }

    length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
    if( U_FAILURE(errorCode) ||
        length!=UPRV_LENGTHOF(titleWordNoAdjust) ||
        0!=u_memcmp(buffer, titleWordNoAdjust, length) ||
        buffer[length]!=0
    ) {
        log_err("ucasemap_toTitle(default iterator, no break adjustment)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
    }

    /* Set a sentence break iterator. */
    errorCode=U_ZERO_ERROR;
    sentenceIter=ubrk_open(UBRK_SENTENCE, "", NULL, 0, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("error: ubrk_open(UBRK_SENTENCE) failed - %s\n", u_errorName(errorCode));
        ucasemap_close(csm);
        return;
    }
    ucasemap_setBreakIterator(csm, sentenceIter, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("error: ucasemap_setBreakIterator(sentence iterator) failed - %s\n", u_errorName(errorCode));
        ubrk_close(sentenceIter);
        ucasemap_close(csm);
        return;
    }
    iter=ucasemap_getBreakIterator(csm);
    if(iter!=sentenceIter) {
        log_err("ucasemap_getBreakIterator() returns %p!=%p after setting the iterator\n", iter, sentenceIter);
    }

    ucasemap_setOptions(csm, U_TITLECASE_NO_LOWERCASE, &errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("error: ucasemap_setOptions(U_TITLECASE_NO_LOWERCASE) failed - %s\n", u_errorName(errorCode));
        return;
    }

    /* Use the sentence break iterator with the option. Preflight first. */
    length=ucasemap_toTitle(csm, NULL, 0, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
    if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
        length!=UPRV_LENGTHOF(titleSentNoLower)
    ) {
        log_err("ucasemap_toTitle(preflight sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
    }

    errorCode=U_ZERO_ERROR;
    buffer[0]=0;
    length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
    if( U_FAILURE(errorCode) ||
        length!=UPRV_LENGTHOF(titleSentNoLower) ||
        0!=u_memcmp(buffer, titleSentNoLower, length) ||
        buffer[length]!=0
    ) {
        log_err("ucasemap_toTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
    }

    /* UTF-8 C API coverage. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */
    {
        char utf8BeforeTitle[64], utf8TitleSentNoLower[64], utf8[64];
        int32_t utf8BeforeTitleLength, utf8TitleSentNoLowerLength;

        errorCode=U_ZERO_ERROR;
        u_strToUTF8(utf8BeforeTitle, (int32_t)sizeof(utf8BeforeTitle), &utf8BeforeTitleLength, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode);
        u_strToUTF8(utf8TitleSentNoLower, (int32_t)sizeof(utf8TitleSentNoLower), &utf8TitleSentNoLowerLength, titleSentNoLower, UPRV_LENGTHOF(titleSentNoLower), &errorCode);

        length=ucasemap_utf8ToTitle(csm, utf8, (int32_t)sizeof(utf8), utf8BeforeTitle, utf8BeforeTitleLength, &errorCode);
        if( U_FAILURE(errorCode) ||
            length!=utf8TitleSentNoLowerLength ||
            0!=uprv_memcmp(utf8, utf8TitleSentNoLower, length) ||
            utf8[length]!=0
        ) {
            log_err("ucasemap_utf8ToTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode));
        }
    }

    ucasemap_close(csm);
}
static int32_t U_CALLCONV
compareUnfold(const void *context, const void *left, const void *right) {
    return u_memcmp((const UChar *)left, (const UChar *)right, UGENCASE_UNFOLD_WIDTH);
}
Example #10
0
/* {{{ grapheme_strrpos_utf16 - strrpos using utf16 */
int
grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC)
{
    UChar *uhaystack, *puhaystack, *uhaystack_end, *uneedle;
    int32_t uhaystack_len, uneedle_len;
    UErrorCode status;
    unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
    UBreakIterator* bi = NULL;
    int ret_pos, pos;

    /* convert the strings to UTF-16. */
    uhaystack = NULL;
    uhaystack_len = 0;
    status = U_ZERO_ERROR;
    intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status );

    if ( U_FAILURE( status ) ) {
        /* Set global error code. */
        intl_error_set_code( NULL, status TSRMLS_CC );

        /* Set error messages. */
        intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
        efree( uhaystack );
        return -1;
    }

    if ( f_ignore_case ) {
        grapheme_intl_case_fold(&uhaystack, &uhaystack, &uhaystack_len, &status );
    }

    /* get a pointer to the haystack taking into account the offset */
    bi = NULL;
    status = U_ZERO_ERROR;
    bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );

    puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset);

    if ( NULL == puhaystack ) {
        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
        efree( uhaystack );
        ubrk_close (bi);
        return -1;
    }

    uneedle = NULL;
    uneedle_len = 0;
    status = U_ZERO_ERROR;
    intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status );

    if ( U_FAILURE( status ) ) {
        /* Set global error code. */
        intl_error_set_code( NULL, status TSRMLS_CC );

        /* Set error messages. */
        intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
        efree( uhaystack );
        efree( uneedle );
        ubrk_close (bi);
        return -1;
    }

    if ( f_ignore_case ) {
        grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status );
    }

    ret_pos = -1;   /* -1 represents 'not found' */

    /* back up until there's needle_len characters to compare */

    uhaystack_end = uhaystack + uhaystack_len;
    pos = ubrk_last(bi);
    puhaystack = uhaystack + pos;

    while ( uhaystack_end - puhaystack < uneedle_len ) {

        pos = ubrk_previous(bi);

        if ( UBRK_DONE == pos ) {
            break;
        }

        puhaystack = uhaystack + pos;
    }

    /* is there enough haystack left to hold the needle? */
    if ( ( uhaystack_end - puhaystack ) < uneedle_len ) {
        /* not enough, not found */
        goto exit;
    }

    while ( UBRK_DONE != pos ) {

        if (!u_memcmp(uneedle, puhaystack, uneedle_len)) {  /* needle_len - 1 in zend memnstr? */

            /* does the grapheme in the haystack end at the same place as the last grapheme in the needle? */

            if ( ubrk_isBoundary(bi, pos + uneedle_len) ) {

                /* found it, get grapheme count offset */
                ret_pos = grapheme_count_graphemes(bi, uhaystack, pos);
                break;
            }

            /* set position back */
            ubrk_isBoundary(bi, pos);
        }

        pos = ubrk_previous(bi);
        puhaystack = uhaystack + pos;
    }

exit:
    efree( uhaystack );
    efree( uneedle );
    ubrk_close (bi);

    return ret_pos;
}
Example #11
0
static void TestUnicodeIDs() {
    UEnumeration *uenum;
    UTransliterator *utrans;
    const UChar *id, *id2;
    int32_t idLength, id2Length, count, count2;

    UErrorCode errorCode;

    errorCode=U_ZERO_ERROR;
    uenum=utrans_openIDs(&errorCode);
    if(U_FAILURE(errorCode)) {
        log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode));
        return;
    }

    count=uenum_count(uenum, &errorCode);
    if(U_FAILURE(errorCode) || count<1) {
        log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode));
    }

    count=0;
    for(;;) {
        id=uenum_unext(uenum, &idLength, &errorCode);
        if(U_FAILURE(errorCode)) {
            log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode));
            break;
        }
        if(id==NULL) {
            break;
        }

        if(++count>10) {
            /* try to actually open only a few transliterators */
            continue;
        }

        utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
        if(U_FAILURE(errorCode)) {
            log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode));
            continue;
        }

        id2=utrans_getUnicodeID(utrans, &id2Length);
        if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) {
            log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength));
        }

        utrans_close(utrans);
    }

    uenum_reset(uenum, &errorCode);
    if(U_FAILURE(errorCode) || count<1) {
        log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode));
    } else {
        count2=uenum_count(uenum, &errorCode);
        if(U_FAILURE(errorCode) || count<1) {
            log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode));
        } else if(count!=count2) {
            log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2);
        }
    }

    uenum_close(uenum);
}
static void
makeUnfoldData() {
    static const UChar
        iDot[2]=        { 0x69, 0x307 };

    UChar *p, *q;
    int32_t i, j, k;
    UErrorCode errorCode;

    /*
     * add a case folding that we missed because it's conditional:
     * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
     */
    addUnfolding(0x130, iDot, 2);

    /* sort the data */
    errorCode=U_ZERO_ERROR;
    uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
                   compareUnfold, NULL, FALSE, &errorCode);

    /* make unique-string rows by merging adjacent ones' code point columns */

    /* make p point to row i-1 */
    p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH;

    for(i=1; i<unfoldRows;) {
        if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
            /* concatenate code point columns */
            q=p+UGENCASE_UNFOLD_STRING_WIDTH;
            for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
            for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
                q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
            }
            if(j>UGENCASE_UNFOLD_CP_WIDTH) {
                fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
                        (long)j, UGENCASE_UNFOLD_CP_WIDTH);
                exit(U_BUFFER_OVERFLOW_ERROR);
            }

            /* move following rows up one */
            --unfoldRows;
            unfoldTop-=UGENCASE_UNFOLD_WIDTH;
            u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
        } else {
            p+=UGENCASE_UNFOLD_WIDTH;
            ++i;
        }
    }

    unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;

    if(beVerbose) {
        puts("unfold data:");

        p=(UChar *)unfold;
        for(i=0; i<unfoldRows; ++i) {
            p+=UGENCASE_UNFOLD_WIDTH;
            printf("[%2d] %04x %04x %04x <- %04x %04x\n",
                   (int)i, p[0], p[1], p[2], p[3], p[4]);
        }
    }
}