int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward) { UBreakIterator* it = wordBreakIterator(chars, len); if (forward) { position = ubrk_following(it, position); while (position != UBRK_DONE) { // We stop searching when the character preceeding the break // is alphanumeric. if (position < len && u_isalnum(chars[position - 1])) return position; position = ubrk_following(it, position); } return len; } else { position = ubrk_preceding(it, position); while (position != UBRK_DONE) { // We stop searching when the character following the break // is alphanumeric. if (position > 0 && u_isalnum(chars[position])) return position; position = ubrk_preceding(it, position); } return 0; } }
int KWQFindNextWordFromIndex(const QChar *chars, int len, int position, bool forward) { int pos = 0; UErrorCode status = U_ZERO_ERROR; UBreakIterator *boundary = ubrk_open(UBRK_WORD, (const char*)currentTextBreakLocaleID().c_str(), const_cast<UChar *>(reinterpret_cast<const UChar *>(chars)), len, &status); if ( boundary && U_SUCCESS(status) ) { if (forward) { pos = ubrk_following(boundary, position); if (pos == UBRK_DONE) pos = len; } else { pos = ubrk_preceding(boundary, position); if (pos == UBRK_DONE) pos = 0; } ubrk_close(boundary); } return pos; }
void KWQFindSentenceBoundary(const QChar *chars, int len, int position, int *start, int *end) { int startPos = 0; int endPos = 0; UErrorCode status = U_ZERO_ERROR; UBreakIterator *boundary = ubrk_open(UBRK_SENTENCE, (const char*)currentTextBreakLocaleID().c_str(), const_cast<UChar *>(reinterpret_cast<const UChar *>(chars)), len, &status); if ( boundary && U_SUCCESS(status) ) { startPos = ubrk_preceding(boundary, position); if (startPos == UBRK_DONE) { startPos = 0; } endPos = ubrk_following(boundary, startPos); if (endPos == UBRK_DONE) endPos = len; ubrk_close(boundary); } *start = startPos; *end = endPos; }
static jint precedingImpl(JNIEnv*, jclass, jint address, jint offset) { return ubrk_preceding(breakIterator(address), offset); }
StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) :UPerfTest(argc,argv,status){ int32_t start, end; #ifdef TEST_BOYER_MOORE_SEARCH bms = NULL; #else srch = NULL; #endif pttrn = NULL; if(status== U_ILLEGAL_ARGUMENT_ERROR || line_mode){ fprintf(stderr,gUsageString, "strsrchperf"); return; } /* Get the Text */ src = getBuffer(srcLen, status); #if 0 /* Get a word to find. Do this by selecting a random word with a word breakiterator. */ UBreakIterator* brk = ubrk_open(UBRK_WORD, locale, src, srcLen, &status); if(U_FAILURE(status)){ fprintf(stderr, "FAILED to create pattern for searching. Error: %s\n", u_errorName(status)); return; } start = ubrk_preceding(brk, 1000); end = ubrk_following(brk, start); pttrnLen = end - start; UChar* temp = (UChar*)malloc(sizeof(UChar)*(pttrnLen)); for (int i = 0; i < pttrnLen; i++) { temp[i] = src[start++]; } pttrn = temp; /* store word in pttrn */ ubrk_close(brk); #else /* The first line of the file contains the pattern */ start = 0; for(end = start; ; end += 1) { UChar ch = src[end]; if (ch == 0x000A || ch == 0x000D || ch == 0x2028) { break; } } pttrnLen = end - start; UChar* temp = (UChar*)malloc(sizeof(UChar)*(pttrnLen)); for (int i = 0; i < pttrnLen; i++) { temp[i] = src[start++]; } pttrn = temp; /* store word in pttrn */ #endif #ifdef TEST_BOYER_MOORE_SEARCH UnicodeString patternString(pttrn, pttrnLen); UCollator *coll = ucol_open(locale, &status); CollData *data = CollData::open(coll, status); targetString = new UnicodeString(src, srcLen); bms = new BoyerMooreSearch(data, patternString, targetString, status); #else /* Create the StringSearch object to be use in performance test. */ srch = usearch_open(pttrn, pttrnLen, src, srcLen, locale, NULL, &status); #endif if(U_FAILURE(status)){ fprintf(stderr, "FAILED to create UPerfTest object. Error: %s\n", u_errorName(status)); return; } }
int textBreakPreceding(TextBreakIterator* iterator, int pos) { return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos); }
static void TestBreakIteratorCAPI() { UErrorCode status = U_ZERO_ERROR; UBreakIterator *word, *sentence, *line, *character, *b, *bogus; int32_t start,pos,end,to; int32_t i; int32_t count = 0; UChar text[50]; /* Note: the adjacent "" are concatenating strings, not adding a \" to the string, which is probably what whoever wrote this intended. Don't fix, because it would throw off the hard coded break positions in the following tests. */ u_uastrcpy(text, "He's from Africa. ""Mr. Livingston, I presume?"" Yeah"); /*test ubrk_open()*/ log_verbose("\nTesting BreakIterator open functions\n"); /* Use french for fun */ word = ubrk_open(UBRK_WORD, "en_US", text, u_strlen(text), &status); if(status == U_FILE_ACCESS_ERROR) { log_data_err("Check your data - it doesn't seem to be around\n"); return; } else if(U_FAILURE(status)){ log_err_status(status, "FAIL: Error in ubrk_open() for word breakiterator: %s\n", myErrorName(status)); } else{ log_verbose("PASS: Successfully opened word breakiterator\n"); } sentence = ubrk_open(UBRK_SENTENCE, "en_US", text, u_strlen(text), &status); if(U_FAILURE(status)){ log_err_status(status, "FAIL: Error in ubrk_open() for sentence breakiterator: %s\n", myErrorName(status)); return; } else{ log_verbose("PASS: Successfully opened sentence breakiterator\n"); } line = ubrk_open(UBRK_LINE, "en_US", text, u_strlen(text), &status); if(U_FAILURE(status)){ log_err("FAIL: Error in ubrk_open() for line breakiterator: %s\n", myErrorName(status)); return; } else{ log_verbose("PASS: Successfully opened line breakiterator\n"); } character = ubrk_open(UBRK_CHARACTER, "en_US", text, u_strlen(text), &status); if(U_FAILURE(status)){ log_err("FAIL: Error in ubrk_open() for character breakiterator: %s\n", myErrorName(status)); return; } else{ log_verbose("PASS: Successfully opened character breakiterator\n"); } /*trying to open an illegal iterator*/ bogus = ubrk_open((UBreakIteratorType)5, "en_US", text, u_strlen(text), &status); if(U_SUCCESS(status)){ log_err("FAIL: Error in ubrk_open() for BOGUS breakiterator. Expected U_ILLEGAL_ARGUMENT_ERROR\n"); } if(U_FAILURE(status)){ if(status != U_ILLEGAL_ARGUMENT_ERROR){ log_err("FAIL: Error in ubrk_open() for BOGUS breakiterator. Expected U_ILLEGAL_ARGUMENT_ERROR\n Got %s\n", myErrorName(status)); } } status=U_ZERO_ERROR; /* ======= Test ubrk_countAvialable() and ubrk_getAvialable() */ log_verbose("\nTesting ubrk_countAvailable() and ubrk_getAvailable()\n"); count=ubrk_countAvailable(); /* use something sensible w/o hardcoding the count */ if(count < 0){ log_err("FAIL: Error in ubrk_countAvialable() returned %d\n", count); } else{ log_verbose("PASS: ubrk_countAvialable() successful returned %d\n", count); } for(i=0;i<count;i++) { log_verbose("%s\n", ubrk_getAvailable(i)); if (ubrk_getAvailable(i) == 0) log_err("No locale for which breakiterator is applicable\n"); else log_verbose("A locale %s for which breakiterator is applicable\n",ubrk_getAvailable(i)); } /*========Test ubrk_first(), ubrk_last()...... and other functions*/ log_verbose("\nTesting the functions for word\n"); start = ubrk_first(word); if(start!=0) log_err("error ubrk_start(word) did not return 0\n"); log_verbose("first (word = %d\n", (int32_t)start); pos=ubrk_next(word); if(pos!=4) log_err("error ubrk_next(word) did not return 4\n"); log_verbose("next (word = %d\n", (int32_t)pos); pos=ubrk_following(word, 4); if(pos!=5) log_err("error ubrl_following(word,4) did not return 6\n"); log_verbose("next (word = %d\n", (int32_t)pos); end=ubrk_last(word); if(end!=49) log_err("error ubrk_last(word) did not return 49\n"); log_verbose("last (word = %d\n", (int32_t)end); pos=ubrk_previous(word); log_verbose("%d %d\n", end, pos); pos=ubrk_previous(word); log_verbose("%d \n", pos); if (ubrk_isBoundary(word, 2) != FALSE) { log_err("error ubrk_isBoundary(word, 2) did not return FALSE\n"); } pos=ubrk_current(word); if (pos != 4) { log_err("error ubrk_current() != 4 after ubrk_isBoundary(word, 2)\n"); } if (ubrk_isBoundary(word, 4) != TRUE) { log_err("error ubrk_isBoundary(word, 4) did not return TRUE\n"); } log_verbose("\nTesting the functions for character\n"); ubrk_first(character); pos = ubrk_following(character, 5); if(pos!=6) log_err("error ubrk_following(character,5) did not return 6\n"); log_verbose("Following (character,5) = %d\n", (int32_t)pos); pos=ubrk_following(character, 18); if(pos!=19) log_err("error ubrk_following(character,18) did not return 19\n"); log_verbose("Followingcharacter,18) = %d\n", (int32_t)pos); pos=ubrk_preceding(character, 22); if(pos!=21) log_err("error ubrk_preceding(character,22) did not return 21\n"); log_verbose("preceding(character,22) = %d\n", (int32_t)pos); log_verbose("\nTesting the functions for line\n"); pos=ubrk_first(line); if(pos != 0) log_err("error ubrk_first(line) returned %d, expected 0\n", (int32_t)pos); pos = ubrk_next(line); pos=ubrk_following(line, 18); if(pos!=22) log_err("error ubrk_following(line) did not return 22\n"); log_verbose("following (line) = %d\n", (int32_t)pos); log_verbose("\nTesting the functions for sentence\n"); ubrk_first(sentence); pos = ubrk_current(sentence); log_verbose("Current(sentence) = %d\n", (int32_t)pos); pos = ubrk_last(sentence); if(pos!=49) log_err("error ubrk_last for sentence did not return 49\n"); log_verbose("Last (sentence) = %d\n", (int32_t)pos); ubrk_first(sentence); to = ubrk_following( sentence, 0 ); if (to == 0) log_err("ubrk_following returned 0\n"); to = ubrk_preceding( sentence, to ); if (to != 0) log_err("ubrk_preceding didn't return 0\n"); if (ubrk_first(sentence)!=ubrk_current(sentence)) { log_err("error in ubrk_first() or ubrk_current()\n"); } /*---- */ /*Testing ubrk_open and ubrk_close()*/ log_verbose("\nTesting open and close for us locale\n"); b = ubrk_open(UBRK_WORD, "fr_FR", text, u_strlen(text), &status); if (U_FAILURE(status)) { log_err("ubrk_open for word returned NULL: %s\n", myErrorName(status)); } ubrk_close(b); /* Test setText and setUText */ { UChar s1[] = {0x41, 0x42, 0x20, 0}; UChar s2[] = {0x41, 0x42, 0x43, 0x44, 0x45, 0}; UText *ut = NULL; UBreakIterator *bb; int j; log_verbose("\nTesting ubrk_setText() and ubrk_setUText()\n"); status = U_ZERO_ERROR; bb = ubrk_open(UBRK_WORD, "en_US", NULL, 0, &status); TEST_ASSERT_SUCCESS(status); ubrk_setText(bb, s1, -1, &status); TEST_ASSERT_SUCCESS(status); ubrk_first(bb); j = ubrk_next(bb); TEST_ASSERT(j == 2); ut = utext_openUChars(ut, s2, -1, &status); ubrk_setUText(bb, ut, &status); TEST_ASSERT_SUCCESS(status); j = ubrk_next(bb); TEST_ASSERT(j == 5); ubrk_close(bb); utext_close(ut); } ubrk_close(word); ubrk_close(sentence); ubrk_close(line); ubrk_close(character); }
/* * imp: common/ubrk.cpp * hdr: common/unicode/ubrk.h * @stable ICU 2.0 * #if !UCONFIG_NO_BREAK_ITERATION * (don't actually conditionalize this, if the underlying library is not * built with break iteration, we want to fail at build time, not runtime) */ U_CAPI int32_t U_EXPORT2 ubrk_preceding_4_0(UBreakIterator *bi, int32_t offset) { return ubrk_preceding(bi, offset); }
int32_t swift::__swift_stdlib_ubrk_preceding(swift::__swift_stdlib_UBreakIterator *bi, int32_t offset) { return ubrk_preceding(ptr_cast<UBreakIterator>(bi), offset); }
static void TestBreakIteratorSuppressions(void) { const TestBISuppressionsItem * itemPtr; for (itemPtr = testBISuppressionsItems; itemPtr->locale != NULL; itemPtr++) { UChar textU[kTextULenMax]; int32_t textULen = u_unescape(itemPtr->text, textU, kTextULenMax); UErrorCode status = U_ZERO_ERROR; UBreakIterator *bi = ubrk_open(UBRK_SENTENCE, itemPtr->locale, textU, textULen, &status); log_verbose("#%d: %s\n", (itemPtr-testBISuppressionsItems), itemPtr->locale); if (U_SUCCESS(status)) { int32_t offset, start; const int32_t * expOffsetPtr; const int32_t * expOffsetStart; expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets; ubrk_first(bi); for (; (offset = ubrk_next(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { if (offset != *expOffsetPtr) { log_err("FAIL: ubrk_next loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset); } } if (offset != UBRK_DONE || *expOffsetPtr >= 0) { log_err("FAIL: ubrk_next loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", itemPtr->locale, offset, *expOffsetPtr); } expOffsetStart = expOffsetPtr = itemPtr->expFwdOffsets; start = ubrk_first(bi) + 1; for (; (offset = ubrk_following(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { if (offset != *expOffsetPtr) { log_err("FAIL: ubrk_following(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset); } start = *expOffsetPtr + 1; } if (offset != UBRK_DONE || *expOffsetPtr >= 0) { log_err("FAIL: ubrk_following(%d) loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr); } expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets; offset = ubrk_last(bi); log_verbose("___ @%d ubrk_last\n", offset); if(offset == 0) { log_err("FAIL: ubrk_last loc \"%s\" unexpected %d\n", itemPtr->locale, offset); } for (; (offset = ubrk_previous(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { if (offset != *expOffsetPtr) { log_err("FAIL: ubrk_previous loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset); } else { log_verbose("[%d] @%d ubrk_previous()\n", (expOffsetPtr - expOffsetStart), offset); } } if (offset != UBRK_DONE || *expOffsetPtr >= 0) { log_err("FAIL: ubrk_previous loc \"%s\", expected UBRK_DONE & expOffset[%d] -1, got %d and %d\n", itemPtr->locale, expOffsetPtr - expOffsetStart, offset, *expOffsetPtr); } expOffsetStart = expOffsetPtr = itemPtr->expRevOffsets; start = ubrk_last(bi) - 1; for (; (offset = ubrk_preceding(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { if (offset != *expOffsetPtr) { log_err("FAIL: ubrk_preceding(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset); } start = *expOffsetPtr - 1; } if (start >=0 && (offset != UBRK_DONE || *expOffsetPtr >= 0)) { log_err("FAIL: ubrk_preceding loc(%d) \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr); } ubrk_close(bi); } else { log_data_err("FAIL: ubrk_open(UBRK_SENTENCE, \"%s\", ...) status %s (Are you missing data?)\n", itemPtr->locale, u_errorName(status)); } } }
int32_t __hs_ubrk_preceding(UBreakIterator *bi, int32_t offset) { return ubrk_preceding(bi, offset); }