Beispiel #1
1
//----------------------------------------------------------------------------------------
//
//   ubrk_openRules      open a break iterator from a set of break rules.
//                       Invokes the rule builder.
//
//----------------------------------------------------------------------------------------
U_CAPI UBreakIterator* U_EXPORT2
ubrk_openRules(  const UChar        *rules,
                       int32_t       rulesLength,
                 const UChar        *text,
                       int32_t       textLength,
                       UParseError  *parseErr,
                       UErrorCode   *status)  {

    if (status == NULL || U_FAILURE(*status)){
        return 0;
    }

    BreakIterator *result = 0;
    UnicodeString ruleString(rules, rulesLength);
    result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, *parseErr, *status);
    if(U_FAILURE(*status)) {
        return 0;
    }

    if (text != NULL) {
        UCharCharacterIterator *iter = 0;
        iter = new UCharCharacterIterator(text, textLength);
        if(iter == 0) {
            *status = U_MEMORY_ALLOCATION_ERROR;
            delete result;
            return 0;
        }
        result->adoptText(iter);
    }
    return (UBreakIterator *)result;
}
Beispiel #2
0
//----------------------------------------------------------------------------------------
//
//    ubrk_open      Create a canned type of break iterator based on type (word, line, etc.)
//                   and locale.
//
//----------------------------------------------------------------------------------------
U_CAPI UBreakIterator* U_EXPORT2
ubrk_open(UBreakIteratorType type,
      const char *locale,
      const UChar *text,
      int32_t textLength,
      UErrorCode *status)
{

  if(U_FAILURE(*status)) return 0;

  BreakIterator *result = 0;

  switch(type) {

  case UBRK_CHARACTER:
    result = BreakIterator::createCharacterInstance(Locale(locale), *status);
    break;

  case UBRK_WORD:
    result = BreakIterator::createWordInstance(Locale(locale), *status);
    break;

  case UBRK_LINE:
    result = BreakIterator::createLineInstance(Locale(locale), *status);
    break;

  case UBRK_SENTENCE:
    result = BreakIterator::createSentenceInstance(Locale(locale), *status);
    break;

  case UBRK_TITLE:
    result = BreakIterator::createTitleInstance(Locale(locale), *status);
    break;
  }

  // check for allocation error
  if (U_FAILURE(*status)) {
     return 0;
  }
  if(result == 0) {
    *status = U_MEMORY_ALLOCATION_ERROR;
    return 0;
  }

  if (text != NULL) {
      UCharCharacterIterator *iter = 0;
      iter = new UCharCharacterIterator(text, textLength);
      if(iter == 0) {
          *status = U_MEMORY_ALLOCATION_ERROR;
          delete result;
          return 0;
      }
      result->adoptText(iter);
  }

  return (UBreakIterator*)result;
}
Beispiel #3
0
/*
 * This method does the acutal break comparison and reports the results.
 * It uses a SpaceBreakIterator to iterate over the text with spaces,
 * and a word instance of a Thai BreakIterator to iterate over the text
 * without spaces.
 */
UBool ThaiWordbreakTest::compareWordBreaks(const UChar *spaces, int32_t spaceCount,
                                           const UChar *noSpaces, int32_t noSpaceCount)
{
    UBool result = TRUE;
    Locale thai("th");
    UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(noSpaces, noSpaceCount);
    UErrorCode status = U_ZERO_ERROR;
    
    BreakIterator *breakIter = BreakIterator::createWordInstance(thai, status);
    breakIter->adoptText(noSpaceIter);
    
    SpaceBreakIterator spaceIter(spaces, spaceCount);
    
    int32_t nextBreak = 0;
    int32_t nextSpaceBreak = 0;
    int32_t iterCount = 0;
    
    while (TRUE) {
        nextSpaceBreak = spaceIter.next();
        nextBreak = breakIter->next();
        
        if (nextSpaceBreak == BreakIterator::DONE || nextBreak == BreakIterator::DONE) {
            if (nextBreak != BreakIterator::DONE) {
                fprintf(stderr, "break iterator didn't end.\n");
            } else if (nextSpaceBreak != BreakIterator::DONE) {
                fprintf(stderr, "premature break iterator end.\n");
            }
            
            break;
        }
        
        while (nextSpaceBreak != nextBreak &&
               nextSpaceBreak != BreakIterator::DONE && nextBreak != BreakIterator::DONE) {
            if (nextSpaceBreak < nextBreak) {
                breakNotFound(nextSpaceBreak);
                result = FALSE;
                nextSpaceBreak = spaceIter.next();
            } else if (nextSpaceBreak > nextBreak) {
                foundInvalidBreak(nextBreak);
                result = FALSE;
                nextBreak = breakIter->next();
            }
        }
        
        if (fVerbose) {
            printf("%d   %d\n", nextSpaceBreak, nextBreak);
        }
    }
        
   
    fWordCount = spaceIter.getWordCount();
    
    delete breakIter;

    return result;
}
Beispiel #4
0
/*
 * Generate a text file with spaces in it from a file without.
 */
int generateFile(const UChar *chars, int32_t length) {
    Locale root("");
    UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(chars, length);
    UErrorCode status = U_ZERO_ERROR;
    
    UnicodeSet complexContext(UNICODE_STRING_SIMPLE("[:LineBreak=SA:]"), status);
    BreakIterator *breakIter = BreakIterator::createWordInstance(root, status);
    breakIter->adoptText(noSpaceIter);
    char outbuf[1024];
    int32_t strlength;
    UChar bom = 0xFEFF;
    
    printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &bom, 1, &status));
    int32_t prevbreak = 0;
    while (U_SUCCESS(status)) {
        int32_t nextbreak = breakIter->next();
        if (nextbreak == BreakIterator::DONE) {
            break;
        }
        printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &chars[prevbreak],
                                    nextbreak-prevbreak, &status));
        if (nextbreak > 0 && complexContext.contains(chars[nextbreak-1])
            && complexContext.contains(chars[nextbreak])) {
            printf(" ");
        }
        prevbreak = nextbreak;
    }
    
    if (U_FAILURE(status)) {
        fprintf(stderr, "generate failed: %s\n", u_errorName(status));
        return status;
    }
    else {
        return 0;
    }
}