Exemplo n.º 1
0
void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        text.getText(m_text_);
        usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
    }
}
Exemplo n.º 2
0
void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        text.getText(m_text_);
        setText(m_text_, status);
    }
}
Exemplo n.º 3
0
void printTextRange( BreakIterator& iterator, 
                    int32_t start, int32_t end )
{
    CharacterIterator *strIter = iterator.getText().clone();
    UnicodeString  s;
    strIter->getText(s);

    printf(" %ld %ld\t", (long)start, (long)end);
    printUnicodeString(UnicodeString(s, 0, start));
    printf("|");
    printUnicodeString(UnicodeString(s, start, end-start));
    printf("|");
    printUnicodeString(UnicodeString(s, end));
    puts("");
    delete strIter;
}
Exemplo n.º 4
0
Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
    text(iter.clone()),
    currentIndex(0), nextIndex(0),
    buffer(), bufferPos(0)
{
    init();
}
Exemplo n.º 5
0
// Sets the source to the new character iterator.
void CollationElementIterator::setText(CharacterIterator& source, 
                                       UErrorCode& status)
{
    if (U_FAILURE(status)) 
        return;

    source.getText(string_);
    setText(string_, status);
}
Exemplo n.º 6
0
// Sets the source to the new character iterator.
void CollationElementIterator::setText(CharacterIterator& source, 
                                       UErrorCode& status)
{
    if (U_FAILURE(status)) 
        return;

    int32_t length = source.getLength();
    UChar *buffer = NULL;

    if (length == 0) {
        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
        /* test for NULL */
        if (buffer == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        *buffer = 0;
    }
    else {
        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
        /* test for NULL */
        if (buffer == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        /* 
        Using this constructor will prevent buffer from being removed when
        string gets removed
        */
        UnicodeString string;
        source.getText(string);
        u_memcpy(buffer, string.getBuffer(), length);
    }

    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
        uprv_free((UChar *)m_data_->iteratordata_.string);
    }
    m_data_->isWritable = TRUE;
    /* Free offsetBuffer before initializing it. */
    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 
        &m_data_->iteratordata_, &status);
    m_data_->reset_   = TRUE;
}
Exemplo n.º 7
0
/**
 * Set the input text over which this <tt>Normalizer</tt> will iterate.
 * The iteration position is set to the beginning of the string.
 */
void
Normalizer::setText(const CharacterIterator& newText, 
                    UErrorCode &status) 
{
    if (U_FAILURE(status)) {
        return;
    }
    CharacterIterator *newIter = newText.clone();
    if (newIter == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    delete text;
    text = newIter;
    reset();
}
Exemplo n.º 8
0
SearchIterator::SearchIterator(CharacterIterator &text, 
                               BreakIterator     *breakiter) :
                               m_breakiterator_(breakiter)
{
    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    m_search_->breakIter          = NULL;
    m_search_->isOverlap          = FALSE;
    m_search_->isCanonicalMatch   = FALSE;
    m_search_->isForwardSearching = TRUE;
    m_search_->reset              = TRUE;
    m_search_->matchedIndex       = USEARCH_DONE;
    m_search_->matchedLength      = 0;
    text.getText(m_text_);
    m_search_->text               = m_text_.getBuffer();
    m_search_->textLength         = m_text_.length();
    m_breakiterator_             = breakiter;
}
Exemplo n.º 9
0
/** 
* This is the "real" constructor for this class; it constructs an iterator over 
* the source text using the specified collator
*/
CollationElementIterator::CollationElementIterator(
                                           const CharacterIterator& sourceText,
                                           const RuleBasedCollator* order,
                                           UErrorCode& status)
                                           : isDataOwned_(TRUE)
{
    if (U_FAILURE(status))
        return;

    // **** should I just drop this test? ****
    /*
    if ( sourceText.endIndex() != 0 )
    {
        // A CollationElementIterator is really a two-layered beast.
        // Internally it uses a Normalizer to munge the source text into a form 
        // where all "composed" Unicode characters (such as \u00FC) are split into a 
        // normal character and a combining accent character.  
        // Afterward, CollationElementIterator does its own processing to handle
        // expanding and contracting collation sequences, ignorables, and so on.
        
        Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
                                ? Normalizer::NO_OP : order->getDecomposition();
          
        text = new Normalizer(sourceText, decomp);
        if (text == NULL)
        status = U_MEMORY_ALLOCATION_ERROR;    
    }
    */
    int32_t length = sourceText.getLength();
    UChar *buffer;
    if (length > 0) {
        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
        /* test for NULL */
        if (buffer == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        /* 
        Using this constructor will prevent buffer from being removed when
        string gets removed
        */
        UnicodeString string(buffer, length, length);
        ((CharacterIterator &)sourceText).getText(string);
        const UChar *temp = string.getBuffer();
        u_memcpy(buffer, temp, length);
    }
    else {
        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
        /* test for NULL */
        if (buffer == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        *buffer = 0;
    }
    m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);

    /* Test for buffer overflows */
    if (U_FAILURE(status)) {
        return;
    }
    m_data_->isWritable = TRUE;
}
Exemplo n.º 10
0
void CharIterTest::TestUCharIterator(UCharIterator *iter, CharacterIterator &ci,
                                     const char *moves, const char *which) {
    int32_t m;
    UChar32 c, c2;
    UBool h, h2;

    for(m=0;; ++m) {
        // move both iter and s[index]
        switch(moves[m]) {
        case '0':
            h=iter->hasNext(iter);
            h2=ci.hasNext();
            c=iter->current(iter);
            c2=ci.current();
            break;
        case '|':
            h=iter->hasNext(iter);
            h2=ci.hasNext();
            c=uiter_current32(iter);
            c2=ci.current32();
            break;

        case '+':
            h=iter->hasNext(iter);
            h2=ci.hasNext();
            c=iter->next(iter);
            c2=ci.nextPostInc();
            break;
        case '>':
            h=iter->hasNext(iter);
            h2=ci.hasNext();
            c=uiter_next32(iter);
            c2=ci.next32PostInc();
            break;

        case '-':
            h=iter->hasPrevious(iter);
            h2=ci.hasPrevious();
            c=iter->previous(iter);
            c2=ci.previous();
            break;
        case '<':
            h=iter->hasPrevious(iter);
            h2=ci.hasPrevious();
            c=uiter_previous32(iter);
            c2=ci.previous32();
            break;

        case '2':
            h=h2=FALSE;
            c=(UChar32)iter->move(iter, 2, UITER_CURRENT);
            c2=(UChar32)ci.move(2, CharacterIterator::kCurrent);
            break;

        case '8':
            h=h2=FALSE;
            c=(UChar32)iter->move(iter, -2, UITER_CURRENT);
            c2=(UChar32)ci.move(-2, CharacterIterator::kCurrent);
            break;

        case 0:
            return;
        default:
            errln("error: unexpected move character '%c' in \"%s\"", moves[m], moves);
            return;
        }

        // compare results
        if(c2==0xffff) {
            c2=(UChar32)-1;
        }
        if(c!=c2 || h!=h2 || ci.getIndex()!=iter->getIndex(iter, UITER_CURRENT)) {
            errln("error: UCharIterator(%s) misbehaving at \"%s\"[%d]='%c'", which, moves, m, moves[m]);
        }
    }
}
Exemplo n.º 11
0
void RBBIAPITest::TestGetSetAdoptText()
{
    logln((UnicodeString)"Testing getText setText ");
    IcuTestErrorCode status(*this, "TestGetSetAdoptText");
    UnicodeString str1="first string.";
    UnicodeString str2="Second string.";
    LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
    LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
    if(status.isFailure()){
        errcheckln(status, "Fail : in construction - %s", status.errorName());
            return;
    }


    CharacterIterator* text1= new StringCharacterIterator(str1);
    CharacterIterator* text1Clone = text1->clone();
    CharacterIterator* text2= new StringCharacterIterator(str2);
    CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"

    wordIter1->setText(str1);
    CharacterIterator *tci = &wordIter1->getText();
    UnicodeString      tstr;
    tci->getText(tstr);
    TEST_ASSERT(tstr == str1);
    if(wordIter1->current() != 0)
        errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");

    wordIter1->next(2);

    wordIter1->setText(str2);
    if(wordIter1->current() != 0)
        errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");


    charIter1->adoptText(text1Clone);
    TEST_ASSERT(wordIter1->getText() != charIter1->getText());
    tci = &wordIter1->getText();
    tci->getText(tstr);
    TEST_ASSERT(tstr == str2);
    tci = &charIter1->getText();
    tci->getText(tstr);
    TEST_ASSERT(tstr == str1);


    LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
    rb->adoptText(text1);
    if(rb->getText() != *text1)
        errln((UnicodeString)"ERROR:1 error in adoptText ");
    rb->adoptText(text2);
    if(rb->getText() != *text2)
        errln((UnicodeString)"ERROR:2 error in adoptText ");

    // Adopt where iterator range is less than the entire orignal source string.
    //   (With the change of the break engine to working with UText internally,
    //    CharacterIterators starting at positions other than zero are not supported)
    rb->adoptText(text3);
    TEST_ASSERT(rb->preceding(2) == 0);
    TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
    //if(rb->preceding(2) != 3) {
    //    errln((UnicodeString)"ERROR:3 error in adoptText ");
    //}
    //if(rb->following(11) != BreakIterator::DONE) {
    //    errln((UnicodeString)"ERROR:4 error in adoptText ");
    //}

    // UText API
    //
    //   Quick test to see if UText is working at all.
    //
    const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
    const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
    //                012345678901

    status.reset();
    LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
    wordIter1->setText(ut.getAlias(), status);
    TEST_ASSERT_SUCCESS(status);

    int32_t pos;
    pos = wordIter1->first();
    TEST_ASSERT(pos==0);
    pos = wordIter1->next();
    TEST_ASSERT(pos==5);
    pos = wordIter1->next();
    TEST_ASSERT(pos==6);
    pos = wordIter1->next();
    TEST_ASSERT(pos==11);
    pos = wordIter1->next();
    TEST_ASSERT(pos==UBRK_DONE);

    status.reset();
    LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
    TEST_ASSERT_SUCCESS(status);
    wordIter1->setText(ut2.getAlias(), status);
    TEST_ASSERT_SUCCESS(status);

    pos = wordIter1->first();
    TEST_ASSERT(pos==0);
    pos = wordIter1->next();
    TEST_ASSERT(pos==3);
    pos = wordIter1->next();
    TEST_ASSERT(pos==4);

    pos = wordIter1->last();
    TEST_ASSERT(pos==6);
    pos = wordIter1->previous();
    TEST_ASSERT(pos==4);
    pos = wordIter1->previous();
    TEST_ASSERT(pos==3);
    pos = wordIter1->previous();
    TEST_ASSERT(pos==0);
    pos = wordIter1->previous();
    TEST_ASSERT(pos==UBRK_DONE);

    status.reset();
    UnicodeString sEmpty;
    LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
    wordIter1->getUText(gut2.getAlias(), status);
    TEST_ASSERT_SUCCESS(status);
    status.reset();
}