FoundState ChinaFindPhoneNum(const UChar* chars, unsigned length,
    FindState* s)
{
    #define PREPARE_GOTO_NEXT() \
       *store++ = ch; \
       pattern++; \
       lastDigit = chars;

    char* pattern = s->mPattern;
    UChar* store = s->mStorePtr;
    const UChar* start = chars;
    const UChar* end = chars + length;
    const UChar* lastDigit = 0;
    string16 search16(chars, length);
    std::string searchSpace = UTF16ToUTF8(search16);
retry:
    do {
        bool initialized = s->mInitialized;
        while (chars < end) {
            if (initialized == false) {
                s->mBackThree = s->mBackTwo;
                s->mBackTwo = s->mBackOne;
                s->mBackOne = s->mCurrent;
            }
            UChar ch = s->mCurrent = *chars;
            do {
                char patternChar = *pattern;
                switch (patternChar) {
                    case '1':
                        if (initialized == false) {
                            s->mStartResult = chars - start;
                            initialized = true;
                        }
                        if (ch != patternChar) {
                            goto resetPattern;
                        }
                        PREPARE_GOTO_NEXT();
                        goto nextChar;
                    case '3':
                        if (ch != '3' && ch != '5' && ch != '8') {
                            goto resetPattern;
                        }
                        PREPARE_GOTO_NEXT();
                        goto nextChar;
                    case '0':
                        if (ch < patternChar || ch > '9')
                            goto resetPattern;
                        PREPARE_GOTO_NEXT();
                        goto nextChar;
                    case '\0':
                        if (WTF::isASCIIDigit(ch) == false) {
                            *store = '\0';
                            goto checkMatch;
                        }
                        goto resetPattern;
                    case ' ':
                        if (ch == patternChar)
                            goto nextChar;
                        break;
                    default:
                    commonPunctuation:
                        if (ch == patternChar) {
                            pattern++;
                            goto nextChar;
                        }
                }
            } while (++pattern); // never false
    nextChar:
            chars++;
        }
        break;
resetPattern:
        if (s->mContinuationNode)
            return FOUND_NONE;
        ChinaFindResetNumber(s);
        pattern = s->mPattern;
        store = s->mStorePtr;
    } while (++chars < end);
checkMatch:
    if (WTF::isASCIIDigit((s->mBackOne == '6' && s->mBackTwo == '8') ?
            s->mBackThree : s->mBackOne) || s->mBackOne == '+') {
        if(++chars < end) {
            if (s->mContinuationNode) {
                return FOUND_NONE;
            }
            ChinaFindResetNumber(s);
            pattern = s->mPattern;
            store = s->mStorePtr;
            goto retry;
        } else {
            return FOUND_NONE;
        }
    }
    *store = '\0';
    s->mStorePtr = store;
    s->mPattern = pattern;
    s->mEndResult = lastDigit - start + 1;
    char pState = pattern[0];
    return pState == '\0' ? FOUND_COMPLETE : FOUND_NONE;
}
FoundState FindPartialNumber(const UChar* chars, unsigned length,
    FindState* s)
{
    char* pattern = s->mPattern;
    UChar* store = s->mStorePtr;
    const UChar* start = chars;
    const UChar* end = chars + length;
    const UChar* lastDigit = 0;
    string16 search16(chars, length);
    std::string searchSpace = UTF16ToUTF8(search16);
retry:
    do {
        bool initialized = s->mInitialized;
        while (chars < end) {
            if (initialized == false) {
                s->mBackTwo = s->mBackOne;
                s->mBackOne = s->mCurrent;
            }
            UChar ch = s->mCurrent = *chars;
            do {
                char patternChar = *pattern;
                switch (patternChar) {
                    case '2':
                        if (initialized == false) {
                            s->mStartResult = chars - start;
                            initialized = true;
                        }
                    case '0':
                    case '1':
                        if (ch < patternChar || ch > '9')
                            goto resetPattern;
                        *store++ = ch;
                        pattern++;
                        lastDigit = chars;
                        goto nextChar;
                    case '\0':
                        if (WTF::isASCIIDigit(ch) == false) {
                            *store = '\0';
                            goto checkMatch;
                        }
                        goto resetPattern;
                    case ' ':
                        if (ch == patternChar)
                            goto nextChar;
                        break;
                    case '(':
                        if (ch == patternChar) {
                            s->mStartResult = chars - start;
                            initialized = true;
                            s->mOpenParen = true;
                        }
                        goto commonPunctuation;
                    case ')':
                        if ((ch == patternChar) ^ s->mOpenParen)
                            goto resetPattern;
                    default:
                    commonPunctuation:
                        if (ch == patternChar) {
                            pattern++;
                            goto nextChar;
                        }
                }
            } while (++pattern); // never false
    nextChar:
            chars++;
        }
        break;
resetPattern:
        if (s->mContinuationNode)
            return FOUND_NONE;
        FindResetNumber(s);
        pattern = s->mPattern;
        store = s->mStorePtr;
    } while (++chars < end);
checkMatch:
    /*
     * A few interesting cases:
     *  03122572251 3122572251     # two numbers, s->mBackOne = 0,                  return second
     *  013122572251 3122572251    # two numbers, s->mBackOne = 1, s->mBackTwo = 0, return second
     *  113122572251 3122572251    # two numbers, s->mBackOne = 1, s->mBackTwo = 1, return second
     *
     *  The prefix of above US phone number is "0" or "01" or "11".
     *  Such as three cases mentioned above, the first group phone number
     *  is invalid, but the detection blocks also have a telephone number,
     *  the second valid phone number should be detected.
     */
    if (WTF::isASCIIDigit(s->mBackOne != '1' ? s->mBackOne : s->mBackTwo)) {
        if(++chars < end) {
            if (s->mContinuationNode) {
                return FOUND_NONE;
            }
            FindResetNumber(s);
            pattern = s->mPattern;
            store = s->mStorePtr;
            goto retry;
        } else {
            return FOUND_NONE;
        }
    }
    *store = '\0';
    s->mStorePtr = store;
    s->mPattern = pattern;
    s->mEndResult = lastDigit - start + 1;
    char pState = pattern[0];
    return pState == '\0' ? FOUND_COMPLETE : pState == '(' || (WTF::isASCIIDigit(pState) && WTF::isASCIIDigit(pattern[-1])) ?
        FOUND_NONE : FOUND_PARTIAL;
}
Ejemplo n.º 3
0
FoundState FindPartialNumber(const UChar* chars, unsigned length,
    FindState* s)
{
    char* pattern = s->mPattern;
    UChar* store = s->mStorePtr;
    // CAPPFIX_WEB_NUMBER_PASSING
    int len = 0;
    // CAPPFIX_WEB_NUMBER_PASSING_END
    const UChar* start = chars;
    const UChar* end = chars + length;
    const UChar* lastDigit = 0;
    string16 search16(chars, length);
    std::string searchSpace = UTF16ToUTF8(search16);
    do {
        bool initialized = s->mInitialized;
        while (chars < end) {
            if (initialized == false) {
                s->mBackTwo = s->mBackOne;
                s->mBackOne = s->mCurrent;
            }
            UChar ch = s->mCurrent = *chars;
            do {
                char patternChar = *pattern;
                switch (patternChar) {
                    case '2':
                        if (initialized == false) {
                            s->mStartResult = chars - start;
                            initialized = true;
                        }
                    case '0':
                    case '1':
                        // CAPPFIX_WEB_NUMBER_PASSING
                        if (ch < '0' || ch > '9')
                            if ( ch != '+' )
                                goto resetPattern;
                        *store++ = ch;
                        len++;
                        // CAPPFIX_WEB_NUMBER_PASSING_END
                        pattern++;
                        lastDigit = chars;
                        goto nextChar;
                    // CAPPFIX_WEB_NUMBER_PASSING
                    case '9':
                        if (ch < '0' || ch > '9') {
                            break;
                        } else {
                            *store++ = ch;
                            len++;
                            pattern++;
                            lastDigit = chars;
                            goto nextChar;
                        }
                    // CAPPFIX_WEB_NUMBER_PASSING_END
                    case '\0':
                        if (WTF::isASCIIDigit(ch) == false) {
                            *store = '\0';
                            goto checkMatch;
                        }
                        goto resetPattern;
                    case ' ':
                    // CAPPFIX_WEB_NUMBER_PASSING
                    case 'S':
                        if (ch == ' ')
                            goto nextChar;
                        break;
                    // CAPPFIX_WEB_NUMBER_PASSING_END
                    case '(':
                        if (ch == patternChar) {
                            s->mStartResult = chars - start;
                            initialized = true;
                            s->mOpenParen = true;
                        }
                        goto commonPunctuation;
                    case ')':
                        if ((ch == patternChar) ^ s->mOpenParen)
                            goto resetPattern;
                    default:
                    commonPunctuation:
                        if (ch == patternChar) {
                            pattern++;
                            goto nextChar;
                        }
                }
            } while (++pattern); // never false
    nextChar:
            chars++;
        }
        break;
resetPattern:
        if (s->mContinuationNode)
            return FOUND_NONE;
        FindResetNumber(s);
        pattern = s->mPattern;
        store = s->mStorePtr;
        // CAPPFIX_WEB_NUMBER_PASSING
        len = 0;
        // CAPPFIX_WEB_NUMBER_PASSING_END
    } while (++chars < end);
checkMatch:
    if (WTF::isASCIIDigit(s->mBackOne != '1' ? s->mBackOne : s->mBackTwo)) {
        return FOUND_NONE;
    }
    *store = '\0';
    s->mStorePtr = store;
    s->mPattern = pattern;
    s->mEndResult = lastDigit - start + 1;
    char pState = pattern[0];

    // CAPPFIX_WEB_NUMBER_PASSING
    if ( len > 6 && len < 15)
        if ( (pState == '\0') || (pState == '9') || (pState == 'S') )
            return FOUND_COMPLETE;

    if ( pState == '(' || (WTF::isASCIIDigit(pState) && WTF::isASCIIDigit(pattern[-1])) )
        return FOUND_NONE;
    else
        return FOUND_PARTIAL;
    // CAPPFIX_WEB_NUMBER_PASSING_END
}