Example #1
0
void BytesTrieTest::checkNextWithState(BytesTrie &trie,
                                       const StringAndValue data[], int32_t dataLength) {
    BytesTrie::State noState, state;
    for(int32_t i=0; i<dataLength; ++i) {
        if((i&1)==0) {
            // This should have no effect.
            trie.resetToState(noState);
        }
        const char *expectedString=data[i].s;
        int32_t stringLength=strlen(expectedString);
        int32_t partialLength=stringLength/3;
        for(int32_t j=0; j<partialLength; ++j) {
            if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
                errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
                return;
            }
        }
        trie.saveState(state);
        UStringTrieResult resultAtState=trie.current();
        UStringTrieResult result;
        int32_t valueAtState=-99;
        if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
            valueAtState=trie.getValue();
        }
        result=trie.next(0);  // mismatch
        if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
            errln("trie.next(0) matched after part of %s", data[i].s);
        }
        if( resultAtState!=trie.resetToState(state).current() ||
            (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
        ) {
            errln("trie.next(part of %s) changes current()/getValue() after "
                  "saveState/next(0)/resetToState",
                  data[i].s);
        } else if(!USTRINGTRIE_HAS_VALUE(
                      result=trie.next(expectedString+partialLength,
                                       stringLength-partialLength)) ||
                  result!=trie.current()) {
            errln("trie.next(rest of %s) does not seem to contain %s after "
                  "saveState/next(0)/resetToState",
                  data[i].s, data[i].s);
        } else if(!USTRINGTRIE_HAS_VALUE(
                      result=trie.resetToState(state).
                                  next(expectedString+partialLength,
                                       stringLength-partialLength)) ||
                  result!=trie.current()) {
            errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
                  data[i].s);
        } else if(trie.getValue()!=data[i].value) {
            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
                  data[i].s,
                  (long)trie.getValue(), (long)trie.getValue(),
                  (long)data[i].value, (long)data[i].value);
        }
        trie.reset();
    }
}
Example #2
0
 virtual void call(UErrorCode * /*pErrorCode*/) {
     if(noDict) {
         return;
     }
     const ULine *lines=perf.getCachedLines();
     int32_t numLines=perf.getNumLines();
     for(int32_t i=0; i<numLines; ++i) {
         const UChar *line=lines[i].name;
         // Skip comment lines (start with a character below 'A').
         if(line[0]<0x41) {
             continue;
         }
         UStringTrieResult result=trie->first(thaiCharToByte(line[0]));
         int32_t lineLength=lines[i].len;
         for(int32_t j=1; j<lineLength; ++j) {
             if(!USTRINGTRIE_HAS_NEXT(result)) {
                 fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
                 break;
             }
             result=trie->next(thaiCharToByte(line[j]));
         }
         if(!USTRINGTRIE_HAS_VALUE(result)) {
             fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
         }
     }
 }
Example #3
0
static int32_t
bytesTrieMatches(BytesTrie &trie,
                 UText *text, int32_t textLimit,
                 int32_t *lengths, int &count, int limit ) {
    UChar32 c=utext_next32(text);
    if(c<0) {
        return 0;
    }
    UStringTrieResult result=trie.first(thaiCharToByte(c));
    int32_t numChars=1;
    count=0;
    for(;;) {
        if(USTRINGTRIE_HAS_VALUE(result)) {
            if(count<limit) {
                // lengths[count++]=(int32_t)utext_getNativeIndex(text);
                lengths[count++]=numChars;  // CompactTrieDictionary just counts chars too.
            }
            if(result==USTRINGTRIE_FINAL_VALUE) {
                break;
            }
        } else if(result==USTRINGTRIE_NO_MATCH) {
            break;
        }
        if(numChars>=textLimit) {
            break;
        }
        UChar32 c=utext_next32(text);
        if(c<0) {
            break;
        }
        ++numChars;
        result=trie.next(thaiCharToByte(c));
    }
    return numChars;
}
Example #4
0
static int32_t bytesTrieLookup(const char *s, const char *nameTrieBytes) {
    BytesTrie trie(nameTrieBytes);
    if(USTRINGTRIE_HAS_VALUE(trie.next(s, -1))) {
        return trie.getValue();
    } else {
        return -1;
    }
}
Example #5
0
// Closely imitate CompactTrieDictionary::matches().
// Note: CompactTrieDictionary::matches() is part of its trie implementation,
// and while it loops over the text, it knows the current state.
// By contrast, this implementation uses UCharsTrie API functions that have to
// check the trie state each time and load/store state in the object.
// (Whether it hasNext() and whether it is in the middle of a linear-match node.)
static int32_t
ucharsTrieMatches(UCharsTrie &trie,
                  UText *text, int32_t textLimit,
                  int32_t *lengths, int &count, int limit ) {
    UChar32 c=utext_next32(text);
    // Notes:
    // a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
    // b) It also ignores non-BMP code points by casting to UChar!
    if(c<0) {
        return 0;
    }
    // Should be firstForCodePoint() but CompactTrieDictionary
    // handles only code units.
    UStringTrieResult result=trie.first(c);
    int32_t numChars=1;
    count=0;
    for(;;) {
        if(USTRINGTRIE_HAS_VALUE(result)) {
            if(count<limit) {
                // lengths[count++]=(int32_t)utext_getNativeIndex(text);
                lengths[count++]=numChars;  // CompactTrieDictionary just counts chars too.
            }
            if(result==USTRINGTRIE_FINAL_VALUE) {
                break;
            }
        } else if(result==USTRINGTRIE_NO_MATCH) {
            break;
        }
        if(numChars>=textLimit) {
            // Note: Why do we have both a text limit and a UText that knows its length?
            break;
        }
        UChar32 c=utext_next32(text);
        // Notes:
        // a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
        // b) It also ignores non-BMP code points by casting to UChar!
        if(c<0) {
            break;
        }
        ++numChars;
        // Should be nextForCodePoint() but CompactTrieDictionary
        // handles only code units.
        result=trie.next(c);
    }
#if 0
    // Note: CompactTrieDictionary::matches() comments say that it leaves the UText
    // after the longest prefix match and returns the number of characters
    // that were matched.
    if(index!=lastMatch) {
        utext_setNativeIndex(text, lastMatch);
    }
    return lastMatch-start;
    // However, it does not do either of these, so I am not trying to
    // imitate it (or its docs) 100%.
#endif
    return numChars;
}
Example #6
0
void BytesTrieTest::checkFirst(BytesTrie &trie,
                               const StringAndValue data[], int32_t dataLength) {
    for(int32_t i=0; i<dataLength; ++i) {
        int c=*data[i].s;
        if(c==0) {
            continue;  // skip empty string
        }
        UStringTrieResult firstResult=trie.first(c);
        int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
        UStringTrieResult nextResult=trie.next(data[i].s[1]);
        if(firstResult!=trie.reset().next(c) ||
           firstResult!=trie.current() ||
           firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
           nextResult!=trie.next(data[i].s[1])
        ) {
            errln("trie.first(%c)!=trie.reset().next(same) for %s",
                  c, data[i].s);
        }
    }
    trie.reset();
}
Example #7
0
 virtual void call(UErrorCode * /*pErrorCode*/) {
     const ULine *lines=perf.getCachedLines();
     int32_t numLines=perf.getNumLines();
     for(int32_t i=0; i<numLines; ++i) {
         // Skip comment lines (which start with a character below 'A').
         if(lines[i].name[0]<0x41) {
             continue;
         }
         if(!USTRINGTRIE_HAS_VALUE(trie->reset().next(lines[i].name, lines[i].len))) {
             fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
         }
     }
 }
Example #8
0
int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
                            int32_t *lengths, int32_t *cpLengths, int32_t *values,
                            int32_t *prefix, UnicodeSet const* ignoreSet, int32_t minLength) const {

    UCharsTrie uct(characters);
    int32_t startingTextIndex = utext_getNativeIndex(text);
    int32_t wordCount = 0;
    int32_t codePointsMatched = 0;

    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
        UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
        int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
        codePointsMatched += 1;
        if (ignoreSet != NULL && ignoreSet->contains(c)) {
            continue;
        }
        if (USTRINGTRIE_HAS_VALUE(result)) {
            if (codePointsMatched < minLength) {
                continue;
            }
            if (wordCount < limit) {
                if (values != NULL) {
                    values[wordCount] = uct.getValue();
                }
                if (lengths != NULL) {
                    lengths[wordCount] = lengthMatched;
                }
                if (cpLengths != NULL) {
                    cpLengths[wordCount] = codePointsMatched;
                }
                ++wordCount;
            }
            if (result == USTRINGTRIE_FINAL_VALUE) {
                break;
            }
        }
        else if (result == USTRINGTRIE_NO_MATCH) {
            break;
        }
        if (lengthMatched >= maxLength) {
            break;
        }
    }

    if (prefix != NULL) {
        *prefix = codePointsMatched;
    }
    return wordCount;
}
Example #9
0
UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
    if(name==NULL) {
        return FALSE;
    }
    UStringTrieResult result=USTRINGTRIE_NO_VALUE;
    char c;
    while((c=*name++)!=0) {
        c=uprv_invCharToLowercaseAscii(c);
        // Ignore delimiters '-', '_', and ASCII White_Space.
        if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
            continue;
        }
        if(!USTRINGTRIE_HAS_NEXT(result)) {
            return FALSE;
        }
        result=trie.next((uint8_t)c);
    }
    return USTRINGTRIE_HAS_VALUE(result);
}
Example #10
0
int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
                            int32_t *lengths, int32_t *cpLengths, int32_t *values,
                            int32_t *prefix) const {
    BytesTrie bt(characters);
    int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
    int32_t wordCount = 0;
    int32_t codePointsMatched = 0;

    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
        UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
        int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
        codePointsMatched += 1;
        if (USTRINGTRIE_HAS_VALUE(result)) {
            if (wordCount < limit) {
                if (values != NULL) {
                    values[wordCount] = bt.getValue();
                }
                if (lengths != NULL) {
                    lengths[wordCount] = lengthMatched;
                }
                if (cpLengths != NULL) {
                    cpLengths[wordCount] = codePointsMatched;
                }
                ++wordCount;
            }
            if (result == USTRINGTRIE_FINAL_VALUE) {
                break;
            }
        }
        else if (result == USTRINGTRIE_NO_MATCH) {
            break;
        }
        if (lengthMatched >= maxLength) {
            break;
        }
    }

    if (prefix != NULL) {
        *prefix = codePointsMatched;
    }
    return wordCount;
}
int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
    UCharsTrie uct(characters);
    UChar32 c = utext_next32(text);
    if (c < 0) {
        return 0;
    }
    UStringTrieResult result = uct.first(c);
    int32_t numChars = 1;
    count = 0;
    for (;;) {
        if (USTRINGTRIE_HAS_VALUE(result)) {
            if (count < limit) {
                if (values != NULL) {
                    values[count] = uct.getValue();
                }
                lengths[count++] = numChars;
            }
            if (result == USTRINGTRIE_FINAL_VALUE) {
                break;
            }
        }
        else if (result == USTRINGTRIE_NO_MATCH) {
            break;
        }

        // TODO: why do we have a text limit if the UText knows its length?
        if (numChars >= maxLength) {
            break;
        }

        c = utext_next32(text);
        if (c < 0) {
            break;
        }
        ++numChars;
        result = uct.next(c);
    }
    return numChars;
}
Example #12
0
void BytesTrieTest::checkNext(BytesTrie &trie,
                              const StringAndValue data[], int32_t dataLength) {
    BytesTrie::State state;
    for(int32_t i=0; i<dataLength; ++i) {
        int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
        UStringTrieResult result;
        if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
            result!=trie.current()
        ) {
            errln("trie does not seem to contain %s", data[i].s);
        } else if(trie.getValue()!=data[i].value) {
            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
                  data[i].s,
                  (long)trie.getValue(), (long)trie.getValue(),
                  (long)data[i].value, (long)data[i].value);
        } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
            errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
        }
        trie.reset();
        stringLength=strlen(data[i].s);
        result=trie.current();
        for(int32_t j=0; j<stringLength; ++j) {
            if(!USTRINGTRIE_HAS_NEXT(result)) {
                errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
                break;
            }
            if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
                trie.getValue();
                if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
                    errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
                    break;
                }
            }
            result=trie.next(data[i].s[j]);
            if(!USTRINGTRIE_MATCHES(result)) {
                errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
                break;
            }
            if(result!=trie.current()) {
                errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
                break;
            }
        }
        if(!USTRINGTRIE_HAS_VALUE(result)) {
            errln("trie.next()!=hasValue at the end of %s", data[i].s);
            continue;
        }
        trie.getValue();
        if(result!=trie.current()) {
            errln("trie.current() != current()+getValue()+current() after end of %s",
                  data[i].s);
        }
        // Compare the final current() with whether next() can actually continue.
        trie.saveState(state);
        UBool nextContinues=FALSE;
        // Try all graphic characters; we only use those in test strings in this file.
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
        const int32_t minChar=0x20;
        const int32_t maxChar=0x7e;
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
        const int32_t minChar=0x40;
        const int32_t maxChar=0xfe;
#else
        const int32_t minChar=0;
        const int32_t maxChar=0xff;
#endif
        for(int32_t c=minChar; c<=maxChar; ++c) {
            if(trie.resetToState(state).next(c)) {
                nextContinues=TRUE;
                break;
            }
        }
        if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
            errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
                  "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
        }
        trie.reset();
    }
}