/* test one string with the ICU and the reference BOCU-1 implementations */ static void roundtripBOCU1(UConverter *bocu1, int32_t number, const UChar *text, int32_t length) { UChar *roundtripRef, *roundtripICU; char *bocu1Ref, *bocu1ICU; int32_t bocu1RefLength, bocu1ICULength, roundtripRefLength, roundtripICULength; UErrorCode errorCode; roundtripRef = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); roundtripICU = malloc(DEFAULT_BUFFER_SIZE * sizeof(UChar)); bocu1Ref = malloc(DEFAULT_BUFFER_SIZE); bocu1ICU = malloc(DEFAULT_BUFFER_SIZE); /* Unicode -> BOCU-1 */ bocu1RefLength=writeString(text, length, (uint8_t *)bocu1Ref); errorCode=U_ZERO_ERROR; bocu1ICULength=ucnv_fromUChars(bocu1, bocu1ICU, DEFAULT_BUFFER_SIZE, text, length, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucnv_fromUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode)); goto cleanup; } if(bocu1RefLength!=bocu1ICULength || 0!=uprv_memcmp(bocu1Ref, bocu1ICU, bocu1RefLength)) { log_err("Unicode(%d)[%d] -> BOCU-1: reference[%d]!=ICU[%d]\n", number, length, bocu1RefLength, bocu1ICULength); goto cleanup; } /* BOCU-1 -> Unicode */ roundtripRefLength=readString((uint8_t *)bocu1Ref, bocu1RefLength, roundtripRef); if(roundtripRefLength<0) { goto cleanup; /* readString() found an error and reported it */ } roundtripICULength=ucnv_toUChars(bocu1, roundtripICU, DEFAULT_BUFFER_SIZE, bocu1ICU, bocu1ICULength, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucnv_toUChars(BOCU-1, text(%d)[%d]) failed: %s\n", number, length, u_errorName(errorCode)); goto cleanup; } if(length!=roundtripRefLength || 0!=u_memcmp(text, roundtripRef, length)) { log_err("BOCU-1 -> Unicode: original(%d)[%d]!=reference[%d]\n", number, length, roundtripRefLength); goto cleanup; } if(roundtripRefLength!=roundtripICULength || 0!=u_memcmp(roundtripRef, roundtripICU, roundtripRefLength)) { log_err("BOCU-1 -> Unicode: reference(%d)[%d]!=ICU[%d]\n", number, roundtripRefLength, roundtripICULength); goto cleanup; } cleanup: free(roundtripRef); free(roundtripICU); free(bocu1Ref); free(bocu1ICU); }
void CasePropsBuilder::makeUnfoldData(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } UChar *p, *q; int32_t i, j, k; /* sort the data */ int32_t unfoldLength=unfold.length(); int32_t unfoldRows=unfoldLength/UGENCASE_UNFOLD_WIDTH-1; UChar *unfoldBuffer=unfold.getBuffer(-1); uprv_sortArray(unfoldBuffer+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2, compareUnfold, NULL, FALSE, &errorCode); /* make unique-string rows by merging adjacent ones' code point columns */ /* make p point to row i-1 */ p=unfoldBuffer+UGENCASE_UNFOLD_WIDTH; for(i=1; i<unfoldRows;) { if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) { /* concatenate code point columns */ q=p+UGENCASE_UNFOLD_STRING_WIDTH; for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {} for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) { q[j]=q[UGENCASE_UNFOLD_WIDTH+k]; } if(j>UGENCASE_UNFOLD_CP_WIDTH) { fprintf(stderr, "genprops error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n", (long)j, UGENCASE_UNFOLD_CP_WIDTH); errorCode=U_BUFFER_OVERFLOW_ERROR; return; } /* move following rows up one */ --unfoldRows; u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH); } else { p+=UGENCASE_UNFOLD_WIDTH; ++i; } } unfoldBuffer[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows; if(beVerbose) { puts("unfold data:"); p=unfoldBuffer; for(i=0; i<unfoldRows; ++i) { p+=UGENCASE_UNFOLD_WIDTH; printf("[%2d] %04x %04x %04x <- %04x %04x\n", (int)i, p[0], p[1], p[2], p[3], p[4]); } } unfold.releaseBuffer((unfoldRows+1)*UGENCASE_UNFOLD_WIDTH); }
UBool UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const { if(this==&other) { return TRUE; } if(!LinearMatchNode::operator==(other)) { return FALSE; } const UCTLinearMatchNode &o=(const UCTLinearMatchNode &)other; return 0==u_memcmp(s, o.s, length); }
static void TestOpenClose() { UErrorCode errorCode=U_ZERO_ERROR; UDateTimePatternGenerator *dtpg, *dtpg2; const UChar *s; int32_t length; /* Open a DateTimePatternGenerator for the default locale. */ dtpg=udatpg_open(NULL, &errorCode); if(U_FAILURE(errorCode)) { log_err_status(errorCode, "udatpg_open(NULL) failed - %s\n", u_errorName(errorCode)); return; } udatpg_close(dtpg); /* Now one for German. */ dtpg=udatpg_open("de", &errorCode); if(U_FAILURE(errorCode)) { log_err("udatpg_open(de) failed - %s\n", u_errorName(errorCode)); return; } /* Make some modification which we verify gets passed on to the clone. */ udatpg_setDecimal(dtpg, pipeString, 1); /* Clone the generator. */ dtpg2=udatpg_clone(dtpg, &errorCode); if(U_FAILURE(errorCode) || dtpg2==NULL) { log_err("udatpg_clone() failed - %s\n", u_errorName(errorCode)); return; } /* Verify that the clone has the custom decimal symbol. */ s=udatpg_getDecimal(dtpg2, &length); if(s==pipeString || length!=1 || 0!=u_memcmp(s, pipeString, length) || s[length]!=0) { log_err("udatpg_getDecimal(cloned object) did not return the expected string\n"); return; } udatpg_close(dtpg); udatpg_close(dtpg2); }
/* {{{ grapheme_memnstr_grapheme: find needle in haystack using grapheme boundaries */ inline int32_t grapheme_memnstr_grapheme(UBreakIterator *bi, UChar *haystack, UChar *needle, int32_t needle_len, UChar *end) { UChar *p = haystack; UChar ne = needle[needle_len-1]; UErrorCode status; int32_t grapheme_offset; end -= needle_len; while (p <= end) { if ((p = u_memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) { if (!u_memcmp(needle, p, needle_len - 1)) { /* needle_len - 1 works because if needle_len is 1, we've already tested the char */ /* does the grapheme end here? */ status = U_ZERO_ERROR; ubrk_setText (bi, haystack, (end - haystack) + needle_len, &status); if ( ubrk_isBoundary (bi, (p - haystack) + needle_len) ) { /* found it, get grapheme count offset */ grapheme_offset = grapheme_count_graphemes(bi, haystack, (p - haystack)); return grapheme_offset; } } } if (p == NULL) { return -1; } p++; } return -1; }
static void TestBuilder() { UErrorCode errorCode=U_ZERO_ERROR; UDateTimePatternGenerator *dtpg; UDateTimePatternConflict conflict; UEnumeration *en; UChar result[20]; int32_t length, pLength; const UChar *s, *p; const UChar* ptrResult[2]; int32_t count=0; UDateTimePatternGenerator *generator; int32_t formattedCapacity, resultLen,patternCapacity ; UChar pattern[40], formatted[40]; UDateFormat *formatter; UDate sampleDate = 837039928046.0; static const char locale[]= "fr"; UErrorCode status=U_ZERO_ERROR; /* test create an empty DateTimePatternGenerator */ dtpg=udatpg_openEmpty(&errorCode); if(U_FAILURE(errorCode)) { log_err("udatpg_openEmpty() failed - %s\n", u_errorName(errorCode)); return; } /* Add a pattern */ conflict = udatpg_addPattern(dtpg, redundantPattern, 5, FALSE, result, 20, &length, &errorCode); if(U_FAILURE(errorCode)) { log_err("udatpg_addPattern() failed - %s\n", u_errorName(errorCode)); return; } /* Add a redundant pattern */ conflict = udatpg_addPattern(dtpg, redundantPattern, 5, FALSE, result, 20, &length, &errorCode); if(conflict == UDATPG_NO_CONFLICT) { log_err("udatpg_addPattern() failed to find the duplicate pattern.\n"); return; } /* Test pattern == NULL */ s=NULL; length = udatpg_addPattern(dtpg, s, 0, FALSE, result, 20, &length, &errorCode); if(!U_FAILURE(errorCode)&&(length!=0) ) { log_err("udatpg_addPattern failed in illegal argument - pattern is NULL.\n"); return; } /* replace field type */ errorCode=U_ZERO_ERROR; conflict = udatpg_addPattern(dtpg, testPattern2, 7, FALSE, result, 20, &length, &errorCode); if((conflict != UDATPG_NO_CONFLICT)||U_FAILURE(errorCode)) { log_err("udatpg_addPattern() failed to add HH:mm v. - %s\n", u_errorName(errorCode)); return; } length = udatpg_replaceFieldTypes(dtpg, testPattern2, 7, replacedStr, 4, result, 20, &errorCode); if (U_FAILURE(errorCode) || (length==0) ) { log_err("udatpg_replaceFieldTypes failed!\n"); return; } /* Get all skeletons and the crroespong pattern for each skeleton. */ ptrResult[0] = testPattern2; ptrResult[1] = redundantPattern; count=0; en = udatpg_openSkeletons(dtpg, &errorCode); if (U_FAILURE(errorCode) || (length==0) ) { log_err("udatpg_openSkeletons failed!\n"); return; } while ( (s=uenum_unext(en, &length, &errorCode))!= NULL) { p = udatpg_getPatternForSkeleton(dtpg, s, length, &pLength); if (U_FAILURE(errorCode) || p==NULL || u_memcmp(p, ptrResult[count], pLength)!=0 ) { log_err("udatpg_getPatternForSkeleton failed!\n"); return; } count++; } uenum_close(en); /* Get all baseSkeletons */ en = udatpg_openBaseSkeletons(dtpg, &errorCode); count=0; while ( (s=uenum_unext(en, &length, &errorCode))!= NULL) { p = udatpg_getPatternForSkeleton(dtpg, s, length, &pLength); if (U_FAILURE(errorCode) || p==NULL || u_memcmp(p, resultBaseSkeletons[count], pLength)!=0 ) { log_err("udatpg_getPatternForSkeleton failed!\n"); return; } count++; } if (U_FAILURE(errorCode) || (length==0) ) { log_err("udatpg_openSkeletons failed!\n"); return; } uenum_close(en); udatpg_close(dtpg); /* sample code in Userguide */ patternCapacity = (int32_t)(sizeof(pattern)/sizeof((pattern)[0])); status=U_ZERO_ERROR; generator=udatpg_open(locale, &status); if(U_FAILURE(status)) { return; } /* get a pattern for an abbreviated month and day */ length = udatpg_getBestPattern(generator, skeleton, 4, pattern, patternCapacity, &status); formatter = udat_open(UDAT_IGNORE, UDAT_DEFAULT, locale, timeZoneGMT, -1, pattern, length, &status); if (formatter==NULL) { log_err("Failed to initialize the UDateFormat of the sample code in Userguide.\n"); udatpg_close(generator); return; } /* use it to format (or parse) */ formattedCapacity = (int32_t)(sizeof(formatted)/sizeof((formatted)[0])); resultLen=udat_format(formatter, ucal_getNow(), formatted, formattedCapacity, NULL, &status); /* for French, the result is "13 sept." */ /* cannot use the result from ucal_getNow() because the value change evreyday. */ resultLen=udat_format(formatter, sampleDate, formatted, formattedCapacity, NULL, &status); if ( u_memcmp(sampleFormatted, formatted, resultLen) != 0 ) { log_err("Failed udat_format() of sample code in Userguide.\n"); } udatpg_close(generator); udat_close(formatter); }
static void TestUsage() { UErrorCode errorCode=U_ZERO_ERROR; UDateTimePatternGenerator *dtpg; UChar bestPattern[20]; UChar result[20]; int32_t length; UChar *s; const UChar *r; dtpg=udatpg_open("fi", &errorCode); if(U_FAILURE(errorCode)) { log_err_status(errorCode, "udatpg_open(fi) failed - %s\n", u_errorName(errorCode)); return; } length = udatpg_getBestPattern(dtpg, testSkeleton1, 4, bestPattern, 20, &errorCode); if(U_FAILURE(errorCode)) { log_err("udatpg_getBestPattern failed - %s\n", u_errorName(errorCode)); return; } if((u_memcmp(bestPattern, expectingBestPattern, length)!=0) || bestPattern[length]!=0) { log_err("udatpg_getBestPattern did not return the expected string\n"); return; } /* Test skeleton == NULL */ s=NULL; length = udatpg_getBestPattern(dtpg, s, 0, bestPattern, 20, &errorCode); if(!U_FAILURE(errorCode)&&(length!=0) ) { log_err("udatpg_getBestPattern failed in illegal argument - skeleton is NULL.\n"); return; } /* Test udatpg_getSkeleton */ length = udatpg_getSkeleton(dtpg, testPattern, 5, result, 20, &errorCode); if(U_FAILURE(errorCode)) { log_err("udatpg_getSkeleton failed - %s\n", u_errorName(errorCode)); return; } if((u_memcmp(result, expectingSkeleton, length)!=0) || result[length]!=0) { log_err("udatpg_getSkeleton did not return the expected string\n"); return; } /* Test pattern == NULL */ s=NULL; length = udatpg_getSkeleton(dtpg, s, 0, result, 20, &errorCode); if(!U_FAILURE(errorCode)&&(length!=0) ) { log_err("udatpg_getSkeleton failed in illegal argument - pattern is NULL.\n"); return; } /* Test udatpg_getBaseSkeleton */ length = udatpg_getBaseSkeleton(dtpg, testPattern, 5, result, 20, &errorCode); if(U_FAILURE(errorCode)) { log_err("udatpg_getBaseSkeleton failed - %s\n", u_errorName(errorCode)); return; } if((u_memcmp(result, expectingBaseSkeleton, length)!=0) || result[length]!=0) { log_err("udatpg_getBaseSkeleton did not return the expected string\n"); return; } /* Test pattern == NULL */ s=NULL; length = udatpg_getBaseSkeleton(dtpg, s, 0, result, 20, &errorCode); if(!U_FAILURE(errorCode)&&(length!=0) ) { log_err("udatpg_getBaseSkeleton failed in illegal argument - pattern is NULL.\n"); return; } /* set append format to {1}{0} */ udatpg_setAppendItemFormat( dtpg, UDATPG_MONTH_FIELD, testFormat, 7 ); r = udatpg_getAppendItemFormat(dtpg, UDATPG_MONTH_FIELD, &length); if(length!=7 || 0!=u_memcmp(r, testFormat, length) || r[length]!=0) { log_err("udatpg_setAppendItemFormat did not return the expected string\n"); return; } /* set append name to hr */ udatpg_setAppendItemName( dtpg, UDATPG_HOUR_FIELD, appendItemName, 7 ); r = udatpg_getAppendItemName(dtpg, UDATPG_HOUR_FIELD, &length); if(length!=7 || 0!=u_memcmp(r, appendItemName, length) || r[length]!=0) { log_err("udatpg_setAppendItemName did not return the expected string\n"); return; } /* set date time format to {1}{0} */ udatpg_setDateTimeFormat( dtpg, testFormat, 7 ); r = udatpg_getDateTimeFormat(dtpg, &length); if(length!=7 || 0!=u_memcmp(r, testFormat, length) || r[length]!=0) { log_err("udatpg_setDateTimeFormat did not return the expected string\n"); return; } udatpg_close(dtpg); }
/* Try titlecasing with options. */ static void TestUCaseMapToTitle(void) { /* "a 'CaT. A 'dOg! 'eTc." where '=U+02BB */ /* * Note: The sentence BreakIterator does not recognize a '.' * as a sentence terminator if it is followed by lowercase. * That is why the example has the '!'. */ static const UChar beforeTitle[]= { 0x61, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x54, 0x63, 0x2e }, titleWord[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x44, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x74, 0x63, 0x2e }, titleWordNoAdjust[]={ 0x41, 0x20, 0x2bb, 0x63, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x74, 0x63, 0x2e }, titleSentNoLower[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x54, 0x63, 0x2e }; UChar buffer[32]; UCaseMap *csm; UBreakIterator *sentenceIter; const UBreakIterator *iter; int32_t length; UErrorCode errorCode; errorCode=U_ZERO_ERROR; csm=ucasemap_open("", 0, &errorCode); if(U_FAILURE(errorCode)) { log_err("ucasemap_open(\"\") failed - %s\n", u_errorName(errorCode)); return; } iter=ucasemap_getBreakIterator(csm); if(iter!=NULL) { log_err("ucasemap_getBreakIterator() returns %p!=NULL before setting any iterator or titlecasing\n", iter); } /* Use default UBreakIterator: Word breaks. */ length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); if( U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(titleWord) || 0!=u_memcmp(buffer, titleWord, length) || buffer[length]!=0 ) { log_err_status(errorCode, "ucasemap_toTitle(default iterator)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); } if (U_SUCCESS(errorCode)) { iter=ucasemap_getBreakIterator(csm); if(iter==NULL) { log_err("ucasemap_getBreakIterator() returns NULL after titlecasing\n"); } } /* Try U_TITLECASE_NO_BREAK_ADJUSTMENT. */ ucasemap_setOptions(csm, U_TITLECASE_NO_BREAK_ADJUSTMENT, &errorCode); if(U_FAILURE(errorCode)) { log_err_status(errorCode, "error: ucasemap_setOptions(U_TITLECASE_NO_BREAK_ADJUSTMENT) failed - %s\n", u_errorName(errorCode)); return; } length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); if( U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(titleWordNoAdjust) || 0!=u_memcmp(buffer, titleWordNoAdjust, length) || buffer[length]!=0 ) { log_err("ucasemap_toTitle(default iterator, no break adjustment)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); } /* Set a sentence break iterator. */ errorCode=U_ZERO_ERROR; sentenceIter=ubrk_open(UBRK_SENTENCE, "", NULL, 0, &errorCode); if(U_FAILURE(errorCode)) { log_err("error: ubrk_open(UBRK_SENTENCE) failed - %s\n", u_errorName(errorCode)); ucasemap_close(csm); return; } ucasemap_setBreakIterator(csm, sentenceIter, &errorCode); if(U_FAILURE(errorCode)) { log_err("error: ucasemap_setBreakIterator(sentence iterator) failed - %s\n", u_errorName(errorCode)); ubrk_close(sentenceIter); ucasemap_close(csm); return; } iter=ucasemap_getBreakIterator(csm); if(iter!=sentenceIter) { log_err("ucasemap_getBreakIterator() returns %p!=%p after setting the iterator\n", iter, sentenceIter); } ucasemap_setOptions(csm, U_TITLECASE_NO_LOWERCASE, &errorCode); if(U_FAILURE(errorCode)) { log_err("error: ucasemap_setOptions(U_TITLECASE_NO_LOWERCASE) failed - %s\n", u_errorName(errorCode)); return; } /* Use the sentence break iterator with the option. Preflight first. */ length=ucasemap_toTitle(csm, NULL, 0, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); if( errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(titleSentNoLower) ) { log_err("ucasemap_toTitle(preflight sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; buffer[0]=0; length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); if( U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(titleSentNoLower) || 0!=u_memcmp(buffer, titleSentNoLower, length) || buffer[length]!=0 ) { log_err("ucasemap_toTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); } /* UTF-8 C API coverage. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */ { char utf8BeforeTitle[64], utf8TitleSentNoLower[64], utf8[64]; int32_t utf8BeforeTitleLength, utf8TitleSentNoLowerLength; errorCode=U_ZERO_ERROR; u_strToUTF8(utf8BeforeTitle, (int32_t)sizeof(utf8BeforeTitle), &utf8BeforeTitleLength, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); u_strToUTF8(utf8TitleSentNoLower, (int32_t)sizeof(utf8TitleSentNoLower), &utf8TitleSentNoLowerLength, titleSentNoLower, UPRV_LENGTHOF(titleSentNoLower), &errorCode); length=ucasemap_utf8ToTitle(csm, utf8, (int32_t)sizeof(utf8), utf8BeforeTitle, utf8BeforeTitleLength, &errorCode); if( U_FAILURE(errorCode) || length!=utf8TitleSentNoLowerLength || 0!=uprv_memcmp(utf8, utf8TitleSentNoLower, length) || utf8[length]!=0 ) { log_err("ucasemap_utf8ToTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); } } ucasemap_close(csm); }
static int32_t U_CALLCONV compareUnfold(const void *context, const void *left, const void *right) { return u_memcmp((const UChar *)left, (const UChar *)right, UGENCASE_UNFOLD_WIDTH); }
/* {{{ grapheme_strrpos_utf16 - strrpos using utf16 */ int grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC) { UChar *uhaystack, *puhaystack, *uhaystack_end, *uneedle; int32_t uhaystack_len, uneedle_len; UErrorCode status; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; UBreakIterator* bi = NULL; int ret_pos, pos; /* convert the strings to UTF-16. */ uhaystack = NULL; uhaystack_len = 0; status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status ); if ( U_FAILURE( status ) ) { /* Set global error code. */ intl_error_set_code( NULL, status TSRMLS_CC ); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); efree( uhaystack ); return -1; } if ( f_ignore_case ) { grapheme_intl_case_fold(&uhaystack, &uhaystack, &uhaystack_len, &status ); } /* get a pointer to the haystack taking into account the offset */ bi = NULL; status = U_ZERO_ERROR; bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC ); puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset); if ( NULL == puhaystack ) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC ); efree( uhaystack ); ubrk_close (bi); return -1; } uneedle = NULL; uneedle_len = 0; status = U_ZERO_ERROR; intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status ); if ( U_FAILURE( status ) ) { /* Set global error code. */ intl_error_set_code( NULL, status TSRMLS_CC ); /* Set error messages. */ intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC ); efree( uhaystack ); efree( uneedle ); ubrk_close (bi); return -1; } if ( f_ignore_case ) { grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status ); } ret_pos = -1; /* -1 represents 'not found' */ /* back up until there's needle_len characters to compare */ uhaystack_end = uhaystack + uhaystack_len; pos = ubrk_last(bi); puhaystack = uhaystack + pos; while ( uhaystack_end - puhaystack < uneedle_len ) { pos = ubrk_previous(bi); if ( UBRK_DONE == pos ) { break; } puhaystack = uhaystack + pos; } /* is there enough haystack left to hold the needle? */ if ( ( uhaystack_end - puhaystack ) < uneedle_len ) { /* not enough, not found */ goto exit; } while ( UBRK_DONE != pos ) { if (!u_memcmp(uneedle, puhaystack, uneedle_len)) { /* needle_len - 1 in zend memnstr? */ /* does the grapheme in the haystack end at the same place as the last grapheme in the needle? */ if ( ubrk_isBoundary(bi, pos + uneedle_len) ) { /* found it, get grapheme count offset */ ret_pos = grapheme_count_graphemes(bi, uhaystack, pos); break; } /* set position back */ ubrk_isBoundary(bi, pos); } pos = ubrk_previous(bi); puhaystack = uhaystack + pos; } exit: efree( uhaystack ); efree( uneedle ); ubrk_close (bi); return ret_pos; }
static void TestUnicodeIDs() { UEnumeration *uenum; UTransliterator *utrans; const UChar *id, *id2; int32_t idLength, id2Length, count, count2; UErrorCode errorCode; errorCode=U_ZERO_ERROR; uenum=utrans_openIDs(&errorCode); if(U_FAILURE(errorCode)) { log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode)); return; } count=uenum_count(uenum, &errorCode); if(U_FAILURE(errorCode) || count<1) { log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode)); } count=0; for(;;) { id=uenum_unext(uenum, &idLength, &errorCode); if(U_FAILURE(errorCode)) { log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode)); break; } if(id==NULL) { break; } if(++count>10) { /* try to actually open only a few transliterators */ continue; } utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode); if(U_FAILURE(errorCode)) { log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode)); continue; } id2=utrans_getUnicodeID(utrans, &id2Length); if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) { log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength)); } utrans_close(utrans); } uenum_reset(uenum, &errorCode); if(U_FAILURE(errorCode) || count<1) { log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode)); } else { count2=uenum_count(uenum, &errorCode); if(U_FAILURE(errorCode) || count<1) { log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode)); } else if(count!=count2) { log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2); } } uenum_close(uenum); }
static void makeUnfoldData() { static const UChar iDot[2]= { 0x69, 0x307 }; UChar *p, *q; int32_t i, j, k; UErrorCode errorCode; /* * add a case folding that we missed because it's conditional: * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ addUnfolding(0x130, iDot, 2); /* sort the data */ errorCode=U_ZERO_ERROR; uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2, compareUnfold, NULL, FALSE, &errorCode); /* make unique-string rows by merging adjacent ones' code point columns */ /* make p point to row i-1 */ p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH; for(i=1; i<unfoldRows;) { if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) { /* concatenate code point columns */ q=p+UGENCASE_UNFOLD_STRING_WIDTH; for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {} for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) { q[j]=q[UGENCASE_UNFOLD_WIDTH+k]; } if(j>UGENCASE_UNFOLD_CP_WIDTH) { fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n", (long)j, UGENCASE_UNFOLD_CP_WIDTH); exit(U_BUFFER_OVERFLOW_ERROR); } /* move following rows up one */ --unfoldRows; unfoldTop-=UGENCASE_UNFOLD_WIDTH; u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH); } else { p+=UGENCASE_UNFOLD_WIDTH; ++i; } } unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows; if(beVerbose) { puts("unfold data:"); p=(UChar *)unfold; for(i=0; i<unfoldRows; ++i) { p+=UGENCASE_UNFOLD_WIDTH; printf("[%2d] %04x %04x %04x <- %04x %04x\n", (int)i, p[0], p[1], p[2], p[3], p[4]); } } }