static void TestBreakIteratorRuleError() { /* * TestBreakIteratorRuleError - Try to create a BI from rules with syntax errors, * check that the error is reported correctly. */ char rules[] = " # This is a rule comment on line 1\n" "[:L:]; # this rule is OK.\n" "abcdefg); # Error, mismatched parens\n"; UChar *uRules; void *freeHook = NULL; UErrorCode status = U_ZERO_ERROR; UParseError parseErr; UBreakIterator *bi; uRules = toUChar(rules, &freeHook); bi = ubrk_openRules(uRules, -1, /* The rules */ NULL, -1, /* The text to be iterated over. */ &parseErr, &status); if (U_SUCCESS(status)) { log_err("FAIL: construction of break iterator succeeded when it should have failed.\n"); ubrk_close(bi); } else { if (parseErr.line != 3 || parseErr.offset != 8) { log_data_err("FAIL: incorrect error position reported. Got line %d, char %d, expected line 3, char 7 (Are you missing data?)\n", parseErr.line, parseErr.offset); } } freeToUCharStrings(&freeHook); }
static TextBreakIterator* initializeIteratorWithRules(const char* breakRules) { UParseError parseStatus; UErrorCode openStatus = U_ZERO_ERROR; String rules(breakRules); TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.deprecatedCharacters(), rules.length(), 0, 0, &parseStatus, &openStatus)); ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); return iterator; }
static TextBreakIterator* initializeIteratorWithRules(const char* breakRules) { UParseError parseStatus; UErrorCode openStatus = U_ZERO_ERROR; unsigned length = strlen(breakRules); auto upconvertedCharacters = StringView(reinterpret_cast<const LChar*>(breakRules), length).upconvertedCharacters(); TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(upconvertedCharacters, length, 0, 0, &parseStatus, &openStatus)); ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); return iterator; }
/* * TestsBreakIteratorStatusVals() Test the ubrk_getRuleStatusVec() funciton */ static void TestBreakIteratorStatusVec() { #define RULE_STRING_LENGTH 200 UChar rules[RULE_STRING_LENGTH]; #define TEST_STRING_LENGTH 25 UChar testString[TEST_STRING_LENGTH]; UBreakIterator *bi = NULL; int32_t pos = 0; int32_t vals[10]; int32_t numVals; UErrorCode status = U_ZERO_ERROR; u_uastrncpy(rules, "[A-N]{100}; \n" "[a-w]{200}; \n" "[\\p{L}]{300}; \n" "[\\p{N}]{400}; \n" "[0-5]{500}; \n" "!.*;\n", RULE_STRING_LENGTH); u_uastrncpy(testString, "ABC", TEST_STRING_LENGTH); bi = ubrk_openRules(rules, -1, testString, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT(bi != NULL); /* The TEST_ASSERT above should change too... */ if (bi != NULL) { pos = ubrk_next(bi); TEST_ASSERT(pos == 1); memset(vals, -1, sizeof(vals)); numVals = ubrk_getRuleStatusVec(bi, vals, 10, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT(numVals == 2); TEST_ASSERT(vals[0] == 100); TEST_ASSERT(vals[1] == 300); TEST_ASSERT(vals[2] == -1); numVals = ubrk_getRuleStatusVec(bi, vals, 0, &status); TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); TEST_ASSERT(numVals == 2); } ubrk_close(bi); }
/* // Open a break iterator from char * rules. Take care of conversion // of the rules and error checking. */ static UBreakIterator * testOpenRules(char *rules) { UErrorCode status = U_ZERO_ERROR; UChar *ruleSourceU = NULL; void *strCleanUp = NULL; UParseError parseErr; UBreakIterator *bi; ruleSourceU = toUChar(rules, &strCleanUp); bi = ubrk_openRules(ruleSourceU, -1, /* The rules */ NULL, -1, /* The text to be iterated over. */ &parseErr, &status); if (U_FAILURE(status)) { log_data_err("FAIL: ubrk_openRules: ICU Error \"%s\" (Are you missing data?)\n", u_errorName(status)); bi = 0; }; freeToUCharStrings(&strCleanUp); return bi; }
static TextBreakIterator* setUpIteratorWithRules(bool& createdIterator, TextBreakIterator*& iterator, const char* breakRules, const UChar* string, int length) { if (!string) return 0; if (!createdIterator) { UParseError parseStatus; UErrorCode openStatus = U_ZERO_ERROR; String rules(breakRules); iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.characters(), rules.length(), 0, 0, &parseStatus, &openStatus)); createdIterator = true; ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); } if (!iterator) return 0; UErrorCode setTextStatus = U_ZERO_ERROR; ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus); if (U_FAILURE(setTextStatus)) return 0; return iterator; }
void StringCaseTest::TestCasing() { UErrorCode status = U_ZERO_ERROR; #if !UCONFIG_NO_BREAK_ITERATION LocalUBreakIteratorPointer iter; #endif char cLocaleID[100]; UnicodeString locale, input, output, optionsString, result; uint32_t options; int32_t whichCase, type; LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status)); if(U_SUCCESS(status)) { for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) { #if UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE) { continue; } #endif LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status)); if(U_FAILURE(status)) { errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status)); break; } const DataMap *myCase = NULL; while(casingTest->nextCase(myCase, status)) { input = myCase->getString("Input", status); output = myCase->getString("Output", status); if(whichCase!=TEST_FOLD) { locale = myCase->getString("Locale", status); } locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), ""); #if !UCONFIG_NO_BREAK_ITERATION if(whichCase==TEST_TITLE) { type = myCase->getInt("Type", status); if(type>=0) { iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status)); } else if(type==-2) { // Open a trivial break iterator that only delivers { 0, length } // or even just { 0 } as boundaries. static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;" UParseError parseError; iter.adoptInstead(ubrk_openRules(rules, LENGTHOF(rules), NULL, 0, &parseError, &status)); } } #endif options = 0; if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) { optionsString = myCase->getString("Options", status); if(optionsString.indexOf((UChar)0x54)>=0) { // T options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I; } if(optionsString.indexOf((UChar)0x4c)>=0) { // L options|=U_TITLECASE_NO_LOWERCASE; } if(optionsString.indexOf((UChar)0x41)>=0) { // A options|=U_TITLECASE_NO_BREAK_ADJUSTMENT; } } if(U_FAILURE(status)) { dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status)); status = U_ZERO_ERROR; } else { #if UCONFIG_NO_BREAK_ITERATION LocalPointer<UMemory> iter; #endif TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options); } #if !UCONFIG_NO_BREAK_ITERATION iter.adoptInstead(NULL); #endif } } } #if !UCONFIG_NO_BREAK_ITERATION // more tests for API coverage status=U_ZERO_ERROR; input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape(); (result=input).toTitle(NULL); if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) { dataerrln("UnicodeString::toTitle(NULL) failed."); } #endif }
UBreakIterator* get_rules(const char *ruleFileName, UErrorCode status) { /* Read in the rule source file */ long result; long ruleFileSize; FILE *file; OFILE *ufile; UBreakIterator *return_me; file = fopen(ruleFileName, "rb"); if( file == 0 ) { fprintf(stderr, "Could not open file \"%s\"\n", ruleFileName); exit(-1); } fseek(file, 0, SEEK_END); ruleFileSize = ftell(file); fseek(file, 0, SEEK_SET); char *ruleBufferC = (char *) omalloc (ruleFileSize + 1); ruleBufferC[ruleFileSize] = '\0'; result = (long)fread(ruleBufferC, 1, ruleFileSize, file); if (result != ruleFileSize) { fprintf(stderr, "Error reading file \"%s\"\n", ruleFileName); exit (-1); } /* Look for a Unicode Signature (BOM) on the rule file */ int32_t signatureLength; const char * ruleSourceC = ruleBufferC; const char* encoding = ucnv_detectUnicodeSignature( ruleSourceC, ruleFileSize, &signatureLength, &status); /* fprintf(stderr, "DetectUnicodeSig: \"%s\"\n", encoding); */ if (U_FAILURE(status)) { fprintf(stderr, "\nCan not initialize ICU. status = %s\n", u_errorName(status)); exit(1); } if(encoding!=NULL ) { ruleSourceC += signatureLength; ruleFileSize -= signatureLength; } /* fprintf(stderr, "encoding: \"%s\"\n", encoding); */ /* Open a converter to take the rule file to UTF-16 */ UConverter* conv; conv = ucnv_open(encoding, &status); if (U_FAILURE(status)) { fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status)); exit(1); } ufile = u_finit(file, NULL, NULL); u_frewind(ufile); UChar *ruleSourceU = (UChar *) omalloc ((ruleFileSize*sizeof(UChar))+1); long charsRead = u_file_read(ruleSourceU, ruleFileSize, ufile); /* u_fprintf(u_stderr, "Chars read: \"%i\", File size: \"%i\"\n", charsRead, ruleFileSize); */ ruleSourceU[charsRead] = 0; /* u_fprintf(u_stderr, "RulesourceU POST: \"%S\"\n", ruleSourceU); */ ucnv_close(conv); u_fclose(ufile); /* Create the break iterator from the rules */ /* This will compile the rules. */ UParseError parseError; parseError.line = 0; parseError.offset = 0; return_me = ubrk_openRules(ruleSourceU, ruleFileSize, NULL, 0, &parseError, &status); if (U_FAILURE(status)) { fprintf(stderr, "createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", u_errorName(status), (int)parseError.line, (int)parseError.offset); exit(1); }; return return_me; }