static jint cloneImpl(JNIEnv* env, jclass, jint address) { UErrorCode status = U_ZERO_ERROR; jint bufferSize = U_BRK_SAFECLONE_BUFFERSIZE; UBreakIterator* it = ubrk_safeClone(breakIterator(address), NULL, &bufferSize, &status); icu4jni_error(env, status); return reinterpret_cast<uintptr_t>(it); }
static char* iterator_clone(char* obj) { UErrorCode status = U_ZERO_ERROR; int32_t size = U_BRK_SAFECLONE_BUFFERSIZE; obj = (char*) ubrk_safeClone( (UBreakIterator*) obj, NULL, &size, &status ); if(U_FAILURE(status)) { return NULL; } return obj; }
MojErr MojDbTextTokenizer::tokenize(const MojString& text, MojDbTextCollator* collator, KeySet& keysOut) const { LOG_TRACE("Entering function %s", __FUNCTION__); MojAssert(m_ubrk.get()); // convert to UChar from str MojDbTextUtils::UnicodeVec unicodeStr; MojErr err = MojDbTextUtils::strToUnicode(text, unicodeStr); MojErrCheck(err); // clone break iterator and set text MojByte buf[U_BRK_SAFECLONE_BUFFERSIZE]; UErrorCode status = U_ZERO_ERROR; MojInt32 size = sizeof(buf); IterPtr ubrk(ubrk_safeClone(m_ubrk.get(), buf, &size, &status)); MojUnicodeErrCheck(status); MojAssert(ubrk.get()); ubrk_setText(ubrk.get(), unicodeStr.begin(), (MojInt32) unicodeStr.size(), &status); MojUnicodeErrCheck(status); MojInt32 tokBegin = -1; MojInt32 pos = ubrk_first(ubrk.get()); while (pos != UBRK_DONE) { UWordBreak status = (UWordBreak) ubrk_getRuleStatus(ubrk.get()); if (status != UBRK_WORD_NONE) { MojAssert(tokBegin != -1); MojDbKey key; const UChar* tokChars = unicodeStr.begin() + tokBegin; MojSize tokSize = (MojSize) (pos - tokBegin); if (collator) { err = collator->sortKey(tokChars, tokSize, key); MojErrCheck(err); } else { MojString tok; err = MojDbTextUtils::unicodeToStr(tokChars, tokSize, tok); MojErrCheck(err); err = key.assign(tok); MojErrCheck(err); } err = keysOut.put(key); MojErrCheck(err); } tokBegin = pos; pos = ubrk_next(ubrk.get()); } return MojErrNone; }
/* {{{ grapheme_get_break_iterator: get a clone of the global character break iterator */ UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status ) { int32_t buffer_size; UBreakIterator *global_break_iterator = INTL_G( grapheme_iterator ); if ( NULL == global_break_iterator ) { global_break_iterator = ubrk_open(UBRK_CHARACTER, NULL, /* icu default locale - locale has no effect on this iterator */ NULL, /* text not set in global iterator */ 0, /* text length = 0 */ status); INTL_G(grapheme_iterator) = global_break_iterator; } buffer_size = U_BRK_SAFECLONE_BUFFERSIZE; return ubrk_safeClone(global_break_iterator, stack_buffer, &buffer_size, status); }
void StringCaseTest::TestCasingImpl(const UnicodeString &input, const UnicodeString &output, int32_t whichCase, void *iter, const char *localeID, uint32_t options) { // UnicodeString UnicodeString result; const char *name; Locale locale(localeID); result=input; switch(whichCase) { case TEST_LOWER: name="toLower"; result.toLower(locale); break; case TEST_UPPER: name="toUpper"; result.toUpper(locale); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="toTitle"; result.toTitle((BreakIterator *)iter, locale, options); break; #endif case TEST_FOLD: name="foldCase"; result.foldCase(options); break; default: name=""; break; // won't happen } if(result!=output) { dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); } #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE && options==0) { result=input; result.toTitle((BreakIterator *)iter, locale); if(result!=output) { dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); } } #endif // UTF-8 char utf8In[100], utf8Out[100]; int32_t utf8InLength, utf8OutLength, resultLength; UChar *buffer; IcuTestErrorCode errorCode(*this, "TestCasingImpl"); LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); #if !UCONFIG_NO_BREAK_ITERATION if(iter!=NULL) { // Clone the break iterator so that the UCaseMap can safely adopt it. UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode); ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); } #endif u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); switch(whichCase) { case TEST_LOWER: name="ucasemap_utf8ToLower"; utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; case TEST_UPPER: name="ucasemap_utf8ToUpper"; utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="ucasemap_utf8ToTitle"; utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; #endif case TEST_FOLD: name="ucasemap_utf8FoldCase"; utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, errorCode); break; default: name=""; utf8OutLength=0; break; // won't happen } buffer=result.getBuffer(utf8OutLength); u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); if(errorCode.isFailure()) { errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); errorCode.reset(); } else if(result!=output) { errln("error: %s() got a wrong result for a test case from casing.res", name); errln("expected \"" + output + "\" got \"" + result + "\"" ); } }
void StringCaseTest::TestCasingImpl(const UnicodeString &input, const UnicodeString &output, int32_t whichCase, void *iter, const char *localeID, uint32_t options) { // UnicodeString UnicodeString result; const char *name; Locale locale(localeID); result=input; switch(whichCase) { case TEST_LOWER: name="toLower"; result.toLower(locale); break; case TEST_UPPER: name="toUpper"; result.toUpper(locale); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="toTitle"; result.toTitle((BreakIterator *)iter, locale, options); break; #endif case TEST_FOLD: name="foldCase"; result.foldCase(options); break; default: name=""; break; // won't happen } if(result!=output) { errln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); } #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE && options==0) { result=input; result.toTitle((BreakIterator *)iter, locale); if(result!=output) { errln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); } } #endif // UTF-8 char utf8In[100], utf8Out[100]; int32_t utf8InLength, utf8OutLength, resultLength; UChar *buffer; UCaseMap *csm; UErrorCode errorCode; errorCode=U_ZERO_ERROR; csm=ucasemap_open(localeID, options, &errorCode); #if !UCONFIG_NO_BREAK_ITERATION if(iter!=NULL) { // Clone the break iterator so that the UCaseMap can safely adopt it. int32_t size=1; // Not 0 because that only gives preflighting. UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, &size, &errorCode); ucasemap_setBreakIterator(csm, clone, &errorCode); } #endif u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), &errorCode); switch(whichCase) { case TEST_LOWER: name="ucasemap_utf8ToLower"; utf8OutLength=ucasemap_utf8ToLower(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; case TEST_UPPER: name="ucasemap_utf8ToUpper"; utf8OutLength=ucasemap_utf8ToUpper(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; #if !UCONFIG_NO_BREAK_ITERATION case TEST_TITLE: name="ucasemap_utf8ToTitle"; utf8OutLength=ucasemap_utf8ToTitle(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; #endif case TEST_FOLD: name="ucasemap_utf8FoldCase"; utf8OutLength=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), utf8In, utf8InLength, &errorCode); break; default: name=""; utf8OutLength=0; break; // won't happen } buffer=result.getBuffer(utf8OutLength); u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? resultLength : 0); if(U_FAILURE(errorCode)) { errln("error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); } else if(result!=output) { errln("error: %s() got a wrong result for a test case from casing.res", name); } ucasemap_close(csm); }
static void TestBreakIteratorSafeClone(void) { UChar text[51]; /* Keep this odd to test for 64-bit memory alignment */ /* NOTE: This doesn't reliably force mis-alignment of following items. */ uint8_t buffer [CLONETEST_ITERATOR_COUNT] [U_BRK_SAFECLONE_BUFFERSIZE]; int32_t bufferSize = U_BRK_SAFECLONE_BUFFERSIZE; UBreakIterator * someIterators [CLONETEST_ITERATOR_COUNT]; UBreakIterator * someClonedIterators [CLONETEST_ITERATOR_COUNT]; UBreakIterator * brk; UErrorCode status = U_ZERO_ERROR; int32_t start,pos; int32_t i; /*Testing ubrk_safeClone */ /* Note: the adjacent "" are concatenating strings, not adding a \" to the string, which is probably what whoever wrote this intended. Don't fix, because it would throw off the hard coded break positions in the following tests. */ u_uastrcpy(text, "He's from Africa. ""Mr. Livingston, I presume?"" Yeah"); /* US & Thai - rule-based & dictionary based */ someIterators[0] = ubrk_open(UBRK_WORD, "en_US", text, u_strlen(text), &status); if(!someIterators[0] || U_FAILURE(status)) { log_data_err("Couldn't open en_US word break iterator - %s\n", u_errorName(status)); return; } someIterators[1] = ubrk_open(UBRK_WORD, "th_TH", text, u_strlen(text), &status); if(!someIterators[1] || U_FAILURE(status)) { log_data_err("Couldn't open th_TH word break iterator - %s\n", u_errorName(status)); return; } /* test each type of iterator */ for (i = 0; i < CLONETEST_ITERATOR_COUNT; i++) { /* Check the various error & informational states */ /* Null status - just returns NULL */ if (0 != ubrk_safeClone(someIterators[i], buffer[i], &bufferSize, 0)) { log_err("FAIL: Cloned Iterator failed to deal correctly with null status\n"); } /* error status - should return 0 & keep error the same */ status = U_MEMORY_ALLOCATION_ERROR; if (0 != ubrk_safeClone(someIterators[i], buffer[i], &bufferSize, &status) || status != U_MEMORY_ALLOCATION_ERROR) { log_err("FAIL: Cloned Iterator failed to deal correctly with incoming error status\n"); } status = U_ZERO_ERROR; /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ if (0 != ubrk_safeClone(someIterators[i], buffer[i], 0, &status) || status != U_ILLEGAL_ARGUMENT_ERROR) { log_err("FAIL: Cloned Iterator failed to deal correctly with null bufferSize pointer\n"); } status = U_ZERO_ERROR; /* buffer size pointer is 0 - fill in pbufferSize with a size */ bufferSize = 0; if (0 != ubrk_safeClone(someIterators[i], buffer[i], &bufferSize, &status) || U_FAILURE(status) || bufferSize <= 0) { log_err("FAIL: Cloned Iterator failed a sizing request ('preflighting')\n"); } /* Verify our define is large enough */ if (U_BRK_SAFECLONE_BUFFERSIZE < bufferSize) { log_err("FAIL: Pre-calculated buffer size is too small\n"); } /* Verify we can use this run-time calculated size */ if (0 == (brk = ubrk_safeClone(someIterators[i], buffer[i], &bufferSize, &status)) || U_FAILURE(status)) { log_err("FAIL: Iterator can't be cloned with run-time size\n"); } if (brk) ubrk_close(brk); /* size one byte too small - should allocate & let us know */ --bufferSize; if (0 == (brk = ubrk_safeClone(someIterators[i], 0, &bufferSize, &status)) || status != U_SAFECLONE_ALLOCATED_WARNING) { log_err("FAIL: Cloned Iterator failed to deal correctly with too-small buffer size\n"); } if (brk) ubrk_close(brk); status = U_ZERO_ERROR; bufferSize = U_BRK_SAFECLONE_BUFFERSIZE; /* Null buffer pointer - return Iterator & set error to U_SAFECLONE_ALLOCATED_ERROR */ if (0 == (brk = ubrk_safeClone(someIterators[i], 0, &bufferSize, &status)) || status != U_SAFECLONE_ALLOCATED_WARNING) { log_err("FAIL: Cloned Iterator failed to deal correctly with null buffer pointer\n"); } if (brk) ubrk_close(brk); status = U_ZERO_ERROR; /* Mis-aligned buffer pointer. */ { char stackBuf[U_BRK_SAFECLONE_BUFFERSIZE+sizeof(void *)]; void *p; int32_t offset; brk = ubrk_safeClone(someIterators[i], &stackBuf[1], &bufferSize, &status); if (U_FAILURE(status) || brk == 0) { log_err("FAIL: Cloned Iterator failed with misaligned buffer pointer\n"); } if (status == U_SAFECLONE_ALLOCATED_WARNING) { log_err("FAIL: Cloned Iterator allocated when using a mis-aligned buffer.\n"); } offset = (int32_t)((char *)&p-(char*)brk); if (offset < 0) { offset = -offset; } if (offset % sizeof(void *) != 0) { log_err("FAIL: Cloned Iterator failed to align correctly with misaligned buffer pointer\n"); } if (brk) ubrk_close(brk); } /* Null Iterator - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ if (0 != ubrk_safeClone(0, buffer[i], &bufferSize, &status) || status != U_ILLEGAL_ARGUMENT_ERROR) { log_err("FAIL: Cloned Iterator failed to deal correctly with null Iterator pointer\n"); } status = U_ZERO_ERROR; /* Do these cloned Iterators work at all - make a first & next call */ bufferSize = U_BRK_SAFECLONE_BUFFERSIZE; someClonedIterators[i] = ubrk_safeClone(someIterators[i], buffer[i], &bufferSize, &status); start = ubrk_first(someClonedIterators[i]); if(start!=0) log_err("error ubrk_start(clone) did not return 0\n"); pos=ubrk_next(someClonedIterators[i]); if(pos!=4) log_err("error ubrk_next(clone) did not return 4\n"); ubrk_close(someClonedIterators[i]); ubrk_close(someIterators[i]); } }
UBreakIterator * __hs_ubrk_safeClone(const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) { return ubrk_safeClone(bi, stackBuffer, pBufferSize, status); }