Пример #1
0
MojErr MojDbTextCollator::sortKey(const MojString& str, MojDbKey& keyOut) const
{
	// convert to UChar from utf8
	MojDbTextUtils::UnicodeVec chars;
	MojErr err = MojDbTextUtils::strToUnicode(str, chars);
	MojErrCheck(err);
	err = sortKey(chars.begin(), chars.size(), keyOut);
	MojErrCheck(err);

	return MojErrNone;
}
Пример #2
0
MojErr MojDbTextCollator::sortKey(const MojString& str, MojDbKey& keyOut) const
{
    LOG_TRACE("Entering function %s", __FUNCTION__);

	// convert to UChar from utf8
	MojDbTextUtils::UnicodeVec chars;
	MojErr err = MojDbTextUtils::strToUnicode(str, chars);
	MojErrCheck(err);
	err = sortKey(chars.begin(), chars.size(), keyOut);
	MojErrCheck(err);

	return MojErrNone;
}
Пример #3
0
MojErr MojDbTextTokenizer::tokenize(const MojString& text, MojDbTextCollator* collator, KeySet& keysOut) const
{
    LOG_TRACE("Entering function %s", __FUNCTION__);
    MojAssert(m_ubrk.get());

    // convert to UChar from str
    MojDbTextUtils::UnicodeVec unicodeStr;
    MojErr err = MojDbTextUtils::strToUnicode(text, unicodeStr);
    MojErrCheck(err);

    // clone break iterator and set text
    MojByte buf[U_BRK_SAFECLONE_BUFFERSIZE];
    UErrorCode status = U_ZERO_ERROR;
    MojInt32 size = sizeof(buf);
    IterPtr ubrk(ubrk_safeClone(m_ubrk.get(), buf, &size, &status));
    MojUnicodeErrCheck(status);
    MojAssert(ubrk.get());
    ubrk_setText(ubrk.get(), unicodeStr.begin(), (MojInt32) unicodeStr.size(), &status);
    MojUnicodeErrCheck(status);

    MojInt32 tokBegin = -1;
    MojInt32 pos = ubrk_first(ubrk.get());
    while (pos != UBRK_DONE) {
        UWordBreak status = (UWordBreak) ubrk_getRuleStatus(ubrk.get());
        if (status != UBRK_WORD_NONE) {
            MojAssert(tokBegin != -1);
            MojDbKey key;
            const UChar* tokChars = unicodeStr.begin() + tokBegin;
            MojSize tokSize = (MojSize) (pos - tokBegin);
            if (collator) {
                err = collator->sortKey(tokChars, tokSize, key);
                MojErrCheck(err);
            } else {
                MojString tok;
                err = MojDbTextUtils::unicodeToStr(tokChars, tokSize, tok);
                MojErrCheck(err);
                err = key.assign(tok);
                MojErrCheck(err);
            }
            err = keysOut.put(key);
            MojErrCheck(err);
        }
        tokBegin = pos;
        pos = ubrk_next(ubrk.get());
    }
    return MojErrNone;
}