FoundState ChinaFindPhoneNum(const UChar* chars, unsigned length, FindState* s) { #define PREPARE_GOTO_NEXT() \ *store++ = ch; \ pattern++; \ lastDigit = chars; char* pattern = s->mPattern; UChar* store = s->mStorePtr; const UChar* start = chars; const UChar* end = chars + length; const UChar* lastDigit = 0; string16 search16(chars, length); std::string searchSpace = UTF16ToUTF8(search16); retry: do { bool initialized = s->mInitialized; while (chars < end) { if (initialized == false) { s->mBackThree = s->mBackTwo; s->mBackTwo = s->mBackOne; s->mBackOne = s->mCurrent; } UChar ch = s->mCurrent = *chars; do { char patternChar = *pattern; switch (patternChar) { case '1': if (initialized == false) { s->mStartResult = chars - start; initialized = true; } if (ch != patternChar) { goto resetPattern; } PREPARE_GOTO_NEXT(); goto nextChar; case '3': if (ch != '3' && ch != '5' && ch != '8') { goto resetPattern; } PREPARE_GOTO_NEXT(); goto nextChar; case '0': if (ch < patternChar || ch > '9') goto resetPattern; PREPARE_GOTO_NEXT(); goto nextChar; case '\0': if (WTF::isASCIIDigit(ch) == false) { *store = '\0'; goto checkMatch; } goto resetPattern; case ' ': if (ch == patternChar) goto nextChar; break; default: commonPunctuation: if (ch == patternChar) { pattern++; goto nextChar; } } } while (++pattern); // never false nextChar: chars++; } break; resetPattern: if (s->mContinuationNode) return FOUND_NONE; ChinaFindResetNumber(s); pattern = s->mPattern; store = s->mStorePtr; } while (++chars < end); checkMatch: if (WTF::isASCIIDigit((s->mBackOne == '6' && s->mBackTwo == '8') ? s->mBackThree : s->mBackOne) || s->mBackOne == '+') { if(++chars < end) { if (s->mContinuationNode) { return FOUND_NONE; } ChinaFindResetNumber(s); pattern = s->mPattern; store = s->mStorePtr; goto retry; } else { return FOUND_NONE; } } *store = '\0'; s->mStorePtr = store; s->mPattern = pattern; s->mEndResult = lastDigit - start + 1; char pState = pattern[0]; return pState == '\0' ? FOUND_COMPLETE : FOUND_NONE; }
FoundState FindPartialNumber(const UChar* chars, unsigned length, FindState* s) { char* pattern = s->mPattern; UChar* store = s->mStorePtr; const UChar* start = chars; const UChar* end = chars + length; const UChar* lastDigit = 0; string16 search16(chars, length); std::string searchSpace = UTF16ToUTF8(search16); retry: do { bool initialized = s->mInitialized; while (chars < end) { if (initialized == false) { s->mBackTwo = s->mBackOne; s->mBackOne = s->mCurrent; } UChar ch = s->mCurrent = *chars; do { char patternChar = *pattern; switch (patternChar) { case '2': if (initialized == false) { s->mStartResult = chars - start; initialized = true; } case '0': case '1': if (ch < patternChar || ch > '9') goto resetPattern; *store++ = ch; pattern++; lastDigit = chars; goto nextChar; case '\0': if (WTF::isASCIIDigit(ch) == false) { *store = '\0'; goto checkMatch; } goto resetPattern; case ' ': if (ch == patternChar) goto nextChar; break; case '(': if (ch == patternChar) { s->mStartResult = chars - start; initialized = true; s->mOpenParen = true; } goto commonPunctuation; case ')': if ((ch == patternChar) ^ s->mOpenParen) goto resetPattern; default: commonPunctuation: if (ch == patternChar) { pattern++; goto nextChar; } } } while (++pattern); // never false nextChar: chars++; } break; resetPattern: if (s->mContinuationNode) return FOUND_NONE; FindResetNumber(s); pattern = s->mPattern; store = s->mStorePtr; } while (++chars < end); checkMatch: /* * A few interesting cases: * 03122572251 3122572251 # two numbers, s->mBackOne = 0, return second * 013122572251 3122572251 # two numbers, s->mBackOne = 1, s->mBackTwo = 0, return second * 113122572251 3122572251 # two numbers, s->mBackOne = 1, s->mBackTwo = 1, return second * * The prefix of above US phone number is "0" or "01" or "11". * Such as three cases mentioned above, the first group phone number * is invalid, but the detection blocks also have a telephone number, * the second valid phone number should be detected. */ if (WTF::isASCIIDigit(s->mBackOne != '1' ? s->mBackOne : s->mBackTwo)) { if(++chars < end) { if (s->mContinuationNode) { return FOUND_NONE; } FindResetNumber(s); pattern = s->mPattern; store = s->mStorePtr; goto retry; } else { return FOUND_NONE; } } *store = '\0'; s->mStorePtr = store; s->mPattern = pattern; s->mEndResult = lastDigit - start + 1; char pState = pattern[0]; return pState == '\0' ? FOUND_COMPLETE : pState == '(' || (WTF::isASCIIDigit(pState) && WTF::isASCIIDigit(pattern[-1])) ? FOUND_NONE : FOUND_PARTIAL; }
FoundState FindPartialNumber(const UChar* chars, unsigned length, FindState* s) { char* pattern = s->mPattern; UChar* store = s->mStorePtr; // CAPPFIX_WEB_NUMBER_PASSING int len = 0; // CAPPFIX_WEB_NUMBER_PASSING_END const UChar* start = chars; const UChar* end = chars + length; const UChar* lastDigit = 0; string16 search16(chars, length); std::string searchSpace = UTF16ToUTF8(search16); do { bool initialized = s->mInitialized; while (chars < end) { if (initialized == false) { s->mBackTwo = s->mBackOne; s->mBackOne = s->mCurrent; } UChar ch = s->mCurrent = *chars; do { char patternChar = *pattern; switch (patternChar) { case '2': if (initialized == false) { s->mStartResult = chars - start; initialized = true; } case '0': case '1': // CAPPFIX_WEB_NUMBER_PASSING if (ch < '0' || ch > '9') if ( ch != '+' ) goto resetPattern; *store++ = ch; len++; // CAPPFIX_WEB_NUMBER_PASSING_END pattern++; lastDigit = chars; goto nextChar; // CAPPFIX_WEB_NUMBER_PASSING case '9': if (ch < '0' || ch > '9') { break; } else { *store++ = ch; len++; pattern++; lastDigit = chars; goto nextChar; } // CAPPFIX_WEB_NUMBER_PASSING_END case '\0': if (WTF::isASCIIDigit(ch) == false) { *store = '\0'; goto checkMatch; } goto resetPattern; case ' ': // CAPPFIX_WEB_NUMBER_PASSING case 'S': if (ch == ' ') goto nextChar; break; // CAPPFIX_WEB_NUMBER_PASSING_END case '(': if (ch == patternChar) { s->mStartResult = chars - start; initialized = true; s->mOpenParen = true; } goto commonPunctuation; case ')': if ((ch == patternChar) ^ s->mOpenParen) goto resetPattern; default: commonPunctuation: if (ch == patternChar) { pattern++; goto nextChar; } } } while (++pattern); // never false nextChar: chars++; } break; resetPattern: if (s->mContinuationNode) return FOUND_NONE; FindResetNumber(s); pattern = s->mPattern; store = s->mStorePtr; // CAPPFIX_WEB_NUMBER_PASSING len = 0; // CAPPFIX_WEB_NUMBER_PASSING_END } while (++chars < end); checkMatch: if (WTF::isASCIIDigit(s->mBackOne != '1' ? s->mBackOne : s->mBackTwo)) { return FOUND_NONE; } *store = '\0'; s->mStorePtr = store; s->mPattern = pattern; s->mEndResult = lastDigit - start + 1; char pState = pattern[0]; // CAPPFIX_WEB_NUMBER_PASSING if ( len > 6 && len < 15) if ( (pState == '\0') || (pState == '9') || (pState == 'S') ) return FOUND_COMPLETE; if ( pState == '(' || (WTF::isASCIIDigit(pState) && WTF::isASCIIDigit(pattern[-1])) ) return FOUND_NONE; else return FOUND_PARTIAL; // CAPPFIX_WEB_NUMBER_PASSING_END }