IDBKeyPathLexer::TokenType IDBKeyPathLexer::lex(IDBKeyPathElement& element) { while (m_ptr < m_end && isASCIISpace(*m_ptr)) ++m_ptr; if (m_ptr >= m_end) return TokenEnd; ASSERT(m_ptr < m_end); switch (*m_ptr) { case '[': ++m_ptr; return TokenLeftBracket; case ']': ++m_ptr; return TokenRightBracket; case '.': ++m_ptr; return TokenDot; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return lexNumber(element); default: return lexIdentifier(element); } return TokenError; }
// source-list = *WSP [ source *( 1*WSP source ) *WSP ] // / *WSP "'none'" *WSP // void CSPSourceList::parse(const UChar* begin, const UChar* end) { const UChar* position = begin; bool isFirstSourceInList = true; while (position < end) { skipWhile<isASCIISpace>(position, end); const UChar* beginSource = position; skipWhile<isSourceCharacter>(position, end); if (isFirstSourceInList && equalIgnoringCase("'none'", beginSource, position - beginSource)) return; // We represent 'none' as an empty m_list. isFirstSourceInList = false; String scheme, host; int port = 0; bool hostHasWildcard = false; bool portHasWildcard = false; if (parseSource(beginSource, position, scheme, host, port, hostHasWildcard, portHasWildcard)) { if (scheme.isEmpty()) scheme = m_origin->protocol(); m_list.append(CSPSource(scheme, host, port, hostHasWildcard, portHasWildcard)); } ASSERT(position == end || isASCIISpace(*position)); } }
String CSPDirectiveList::parseSuboriginName(const String& policy) { Vector<UChar> characters; policy.appendTo(characters); const UChar* position = characters.data(); const UChar* end = position + characters.size(); // Parse the name of the suborigin (no spaces, single string) skipWhile<UChar, isASCIISpace>(position, end); if (position == end) { m_policy->reportInvalidSuboriginFlags("No suborigin name specified."); return String(); } const UChar* begin = position; skipWhile<UChar, isASCIIAlphanumeric>(position, end); if (position != end && !isASCIISpace(*position)) { m_policy->reportInvalidSuboriginFlags("Invalid character \'" + String(position, 1) + "\' in suborigin."); return String(); } size_t length = position - begin; skipWhile<UChar, isASCIISpace>(position, end); if (position != end) { m_policy->reportInvalidSuboriginFlags("Whitespace is not allowed in suborigin names."); return String(); } return String(begin, length); }
void MediaListDirective::parse(const UChar* begin, const UChar* end) { const UChar* position = begin; // 'plugin-types ____;' OR 'plugin-types;' if (position == end) { policy()->reportInvalidPluginTypes(String()); return; } while (position < end) { // _____ OR _____mime1/mime1 // ^ ^ skipWhile<UChar, isASCIISpace>(position, end); if (position == end) return; // mime1/mime1 mime2/mime2 // ^ begin = position; if (!skipExactly<UChar, isMediaTypeCharacter>(position, end)) { skipWhile<UChar, isNotASCIISpace>(position, end); policy()->reportInvalidPluginTypes(String(begin, position - begin)); continue; } skipWhile<UChar, isMediaTypeCharacter>(position, end); // mime1/mime1 mime2/mime2 // ^ if (!skipExactly<UChar>(position, end, '/')) { skipWhile<UChar, isNotASCIISpace>(position, end); policy()->reportInvalidPluginTypes(String(begin, position - begin)); continue; } // mime1/mime1 mime2/mime2 // ^ if (!skipExactly<UChar, isMediaTypeCharacter>(position, end)) { skipWhile<UChar, isNotASCIISpace>(position, end); policy()->reportInvalidPluginTypes(String(begin, position - begin)); continue; } skipWhile<UChar, isMediaTypeCharacter>(position, end); // mime1/mime1 mime2/mime2 OR mime1/mime1 OR mime1/mime1/error // ^ ^ ^ if (position < end && isNotASCIISpace(*position)) { skipWhile<UChar, isNotASCIISpace>(position, end); policy()->reportInvalidPluginTypes(String(begin, position - begin)); continue; } m_pluginTypes.add(String(begin, position - begin)); ASSERT(position == end || isASCIISpace(*position)); } }
bool StringImpl::containsOnlyWhitespace() { // FIXME: The definition of whitespace here includes a number of characters // that are not whitespace from the point of view of RenderText; I wonder if // that's a problem in practice. for (unsigned i = 0; i < m_length; i++) if (!isASCIISpace(m_data[i])) return false; return true; }
static inline String nextToken(FILE* file) { ASSERT(file); if (!file) return String(); char buffer[maxBuffer] = {0, }; unsigned int index = 0; while (index < maxBuffer) { int ch = fgetc(file); if (ch == EOF || (isASCIISpace(ch) && index)) // Break on non-initial ASCII space. break; if (!isASCIISpace(ch)) { buffer[index] = ch; index++; } } return String(buffer); }
static inline String nextToken(FILE* file) { if (!file) return String(); static const unsigned bufferSize = 128; char buffer[bufferSize] = {0, }; unsigned index = 0; while (index < bufferSize) { int ch = fgetc(file); if (ch == EOF || (isASCIISpace(ch) && index)) // Break on non-initial ASCII space. break; if (!isASCIISpace(ch)) { buffer[index] = ch; index++; } } return String(buffer); }
static bool hasDisallowedCharacters(const char* str, size_t length) { while (length--) { char c = *str++; // '{' is also disallowed, but we don't need to check for it because // parseClause() searches for '{' as the end of the start delimiter. // As a result, the parsed delimiter string will never include '{'. if (c == '}' || isASCIISpace(c)) return true; } return false; }
static String stripLeadingWhiteSpace(const String& string) { unsigned length = string.length(); unsigned i; for (i = 0; i < length; ++i) { if (string[i] != noBreakSpace && (string[i] <= 0x7F ? !isASCIISpace(string[i]) : (direction(string[i]) != WhiteSpaceNeutral))) break; } return string.substring(i, length - i); }
inline static void skipSpacesAndComments(const char*& s) { int nesting = 0; char ch; while ((ch = *s)) { if (!isASCIISpace(ch)) { if (ch == '(') nesting++; else if (ch == ')' && nesting > 0) nesting--; else if (nesting == 0) break; } s++; } }
static inline double toDoubleType(const CharType* data, size_t length, bool* ok, size_t& parsedLength) { size_t leadingSpacesLength = 0; while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength])) ++leadingSpacesLength; double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength); if (!parsedLength) { if (ok) *ok = false; return 0.0; } parsedLength += leadingSpacesLength; if (ok) *ok = policy == AllowTrailingJunk || parsedLength == length; return number; }
bool DOMImplementation::isJSONMIMEType(const String& mimeType) { if (mimeType.startsWith("application/json", false)) return true; if (mimeType.startsWith("application/", false)) { size_t subtype = mimeType.find("+json", 12, false); if (subtype != kNotFound) { // Just check that a parameter wasn't matched. size_t parameterMarker = mimeType.find(";"); if (parameterMarker == kNotFound) { unsigned endSubtype = static_cast<unsigned>(subtype) + 5; return endSubtype == mimeType.length() || isASCIISpace(mimeType[endSubtype]); } return parameterMarker > subtype; } } return false; }
// source-list = *WSP [ source *( 1*WSP source ) *WSP ] // / *WSP "'none'" *WSP // void ContentSecurityPolicySourceList::parse(const UChar* begin, const UChar* end) { const UChar* position = begin; while (position < end) { skipWhile<UChar, isASCIISpace>(position, end); if (position == end) return; const UChar* beginSource = position; skipWhile<UChar, isSourceCharacter>(position, end); String scheme, host, path; int port = 0; bool hostHasWildcard = false; bool portHasWildcard = false; if (parseNonceSource(beginSource, position)) continue; if (parseHashSource(beginSource, position)) continue; if (parseSource(beginSource, position, scheme, host, port, path, hostHasWildcard, portHasWildcard)) { // Wildcard hosts and keyword sources ('self', 'unsafe-inline', // etc.) aren't stored in m_list, but as attributes on the source // list itself. if (scheme.isEmpty() && host.isEmpty()) continue; if (isCSPDirectiveName(host)) m_policy.reportDirectiveAsSourceExpression(m_directiveName, host); m_list.append(ContentSecurityPolicySource(m_policy, scheme, host, port, path, hostHasWildcard, portHasWildcard)); } else m_policy.reportInvalidSourceExpression(m_directiveName, String(beginSource, position - beginSource)); ASSERT(position == end || isASCIISpace(*position)); } }
// source-list = *WSP [ source *( 1*WSP source ) *WSP ] // / *WSP "'none'" *WSP // void CSPSourceList::parse(const UChar* begin, const UChar* end) { // We represent 'none' as an empty m_list. if (isSourceListNone(begin, end)) return; const UChar* position = begin; while (position < end) { skipWhile<UChar, isASCIISpace>(position, end); if (position == end) return; const UChar* beginSource = position; skipWhile<UChar, isSourceCharacter>(position, end); String scheme, host, path; int port = 0; CSPSource::WildcardDisposition hostWildcard = CSPSource::NoWildcard; CSPSource::WildcardDisposition portWildcard = CSPSource::NoWildcard; if (parseSource(beginSource, position, scheme, host, port, path, hostWildcard, portWildcard)) { // Wildcard hosts and keyword sources ('self', 'unsafe-inline', // etc.) aren't stored in m_list, but as attributes on the source // list itself. if (scheme.isEmpty() && host.isEmpty()) continue; if (m_policy->isDirectiveName(host)) m_policy->reportDirectiveAsSourceExpression(m_directiveName, host); m_list.append(CSPSource(m_policy, scheme, host, port, path, hostWildcard, portWildcard)); } else { m_policy->reportInvalidSourceExpression(m_directiveName, String(beginSource, position - beginSource)); } ASSERT(position == end || isASCIISpace(*position)); } }
// Odd case where 'exec' is allowed to be 0, to accomodate a caller in WebCore. double parseDateFromNullTerminatedCharacters(const char* dateString, bool& haveTZ, int& offset) { haveTZ = false; offset = 0; // This parses a date in the form: // Tuesday, 09-Nov-99 23:12:40 GMT // or // Sat, 01-Jan-2000 08:00:00 GMT // or // Sat, 01 Jan 2000 08:00:00 GMT // or // 01 Jan 99 22:00 +0100 (exceptions in rfc822/rfc2822) // ### non RFC formats, added for Javascript: // [Wednesday] January 09 1999 23:12:40 GMT // [Wednesday] January 09 23:12:40 GMT 1999 // // We ignore the weekday. // Skip leading space skipSpacesAndComments(dateString); long month = -1; const char *wordStart = dateString; // Check contents of first words if not number while (*dateString && !isASCIIDigit(*dateString)) { if (isASCIISpace(*dateString) || *dateString == '(') { if (dateString - wordStart >= 3) month = findMonth(wordStart); skipSpacesAndComments(dateString); wordStart = dateString; } else dateString++; } // Missing delimiter between month and day (like "January29")? if (month == -1 && wordStart != dateString) month = findMonth(wordStart); skipSpacesAndComments(dateString); if (!*dateString) return std::numeric_limits<double>::quiet_NaN(); // ' 09-Nov-99 23:12:40 GMT' char* newPosStr; long day; if (!parseLong(dateString, &newPosStr, 10, &day)) return std::numeric_limits<double>::quiet_NaN(); dateString = newPosStr; if (day < 0) return std::numeric_limits<double>::quiet_NaN(); std::optional<int> year; if (day > 31) { // ### where is the boundary and what happens below? if (*dateString != '/') return std::numeric_limits<double>::quiet_NaN(); // looks like a YYYY/MM/DD date if (!*++dateString) return std::numeric_limits<double>::quiet_NaN(); if (day <= std::numeric_limits<int>::min() || day >= std::numeric_limits<int>::max()) return std::numeric_limits<double>::quiet_NaN(); year = static_cast<int>(day); if (!parseLong(dateString, &newPosStr, 10, &month)) return std::numeric_limits<double>::quiet_NaN(); month -= 1; dateString = newPosStr; if (*dateString++ != '/' || !*dateString) return std::numeric_limits<double>::quiet_NaN(); if (!parseLong(dateString, &newPosStr, 10, &day)) return std::numeric_limits<double>::quiet_NaN(); dateString = newPosStr; } else if (*dateString == '/' && month == -1) { dateString++; // This looks like a MM/DD/YYYY date, not an RFC date. month = day - 1; // 0-based if (!parseLong(dateString, &newPosStr, 10, &day)) return std::numeric_limits<double>::quiet_NaN(); if (day < 1 || day > 31) return std::numeric_limits<double>::quiet_NaN(); dateString = newPosStr; if (*dateString == '/') dateString++; if (!*dateString) return std::numeric_limits<double>::quiet_NaN(); } else { if (*dateString == '-') dateString++; skipSpacesAndComments(dateString); if (*dateString == ',') dateString++; if (month == -1) { // not found yet month = findMonth(dateString); if (month == -1) return std::numeric_limits<double>::quiet_NaN(); while (*dateString && *dateString != '-' && *dateString != ',' && !isASCIISpace(*dateString)) dateString++; if (!*dateString) return std::numeric_limits<double>::quiet_NaN(); // '-99 23:12:40 GMT' if (*dateString != '-' && *dateString != '/' && *dateString != ',' && !isASCIISpace(*dateString)) return std::numeric_limits<double>::quiet_NaN(); dateString++; } } if (month < 0 || month > 11) return std::numeric_limits<double>::quiet_NaN(); // '99 23:12:40 GMT' if (*dateString && !year) { int result = 0; if (!parseInt(dateString, &newPosStr, 10, &result)) return std::numeric_limits<double>::quiet_NaN(); year = result; } // Don't fail if the time is missing. long hour = 0; long minute = 0; long second = 0; if (!*newPosStr) dateString = newPosStr; else { // ' 23:12:40 GMT' if (!(isASCIISpace(*newPosStr) || *newPosStr == ',')) { if (*newPosStr != ':') return std::numeric_limits<double>::quiet_NaN(); // There was no year; the number was the hour. year = std::nullopt; } else { // in the normal case (we parsed the year), advance to the next number dateString = ++newPosStr; skipSpacesAndComments(dateString); } parseLong(dateString, &newPosStr, 10, &hour); // Do not check for errno here since we want to continue // even if errno was set becasue we are still looking // for the timezone! // Read a number? If not, this might be a timezone name. if (newPosStr != dateString) { dateString = newPosStr; if (hour < 0 || hour > 23) return std::numeric_limits<double>::quiet_NaN(); if (!*dateString) return std::numeric_limits<double>::quiet_NaN(); // ':12:40 GMT' if (*dateString++ != ':') return std::numeric_limits<double>::quiet_NaN(); if (!parseLong(dateString, &newPosStr, 10, &minute)) return std::numeric_limits<double>::quiet_NaN(); dateString = newPosStr; if (minute < 0 || minute > 59) return std::numeric_limits<double>::quiet_NaN(); // ':40 GMT' if (*dateString && *dateString != ':' && !isASCIISpace(*dateString)) return std::numeric_limits<double>::quiet_NaN(); // seconds are optional in rfc822 + rfc2822 if (*dateString ==':') { dateString++; if (!parseLong(dateString, &newPosStr, 10, &second)) return std::numeric_limits<double>::quiet_NaN(); dateString = newPosStr; if (second < 0 || second > 59) return std::numeric_limits<double>::quiet_NaN(); } skipSpacesAndComments(dateString); if (startsWithLettersIgnoringASCIICase(dateString, "am")) { if (hour > 12) return std::numeric_limits<double>::quiet_NaN(); if (hour == 12) hour = 0; dateString += 2; skipSpacesAndComments(dateString); } else if (startsWithLettersIgnoringASCIICase(dateString, "pm")) { if (hour > 12) return std::numeric_limits<double>::quiet_NaN(); if (hour != 12) hour += 12; dateString += 2; skipSpacesAndComments(dateString); } } } // The year may be after the time but before the time zone. if (isASCIIDigit(*dateString) && !year) { int result = 0; if (!parseInt(dateString, &newPosStr, 10, &result)) return std::numeric_limits<double>::quiet_NaN(); year = result; dateString = newPosStr; skipSpacesAndComments(dateString); } // Don't fail if the time zone is missing. // Some websites omit the time zone (4275206). if (*dateString) { if (startsWithLettersIgnoringASCIICase(dateString, "gmt") || startsWithLettersIgnoringASCIICase(dateString, "utc")) { dateString += 3; haveTZ = true; } if (*dateString == '+' || *dateString == '-') { int o; if (!parseInt(dateString, &newPosStr, 10, &o)) return std::numeric_limits<double>::quiet_NaN(); dateString = newPosStr; if (o < -9959 || o > 9959) return std::numeric_limits<double>::quiet_NaN(); int sgn = (o < 0) ? -1 : 1; o = abs(o); if (*dateString != ':') { if (o >= 24) offset = ((o / 100) * 60 + (o % 100)) * sgn; else offset = o * 60 * sgn; } else { // GMT+05:00 ++dateString; // skip the ':' int o2; if (!parseInt(dateString, &newPosStr, 10, &o2)) return std::numeric_limits<double>::quiet_NaN(); dateString = newPosStr; offset = (o * 60 + o2) * sgn; } haveTZ = true; } else { for (auto& knownZone : knownZones) { // Since the passed-in length is used for both strings, the following checks that // dateString has the time zone name as a prefix, not that it is equal. auto length = strlen(knownZone.tzName); if (equalLettersIgnoringASCIICase(dateString, knownZone.tzName, length)) { offset = knownZone.tzOffset; dateString += length; haveTZ = true; break; } } } } skipSpacesAndComments(dateString); if (*dateString && !year) { int result = 0; if (!parseInt(dateString, &newPosStr, 10, &result)) return std::numeric_limits<double>::quiet_NaN(); year = result; dateString = newPosStr; skipSpacesAndComments(dateString); } // Trailing garbage if (*dateString) return std::numeric_limits<double>::quiet_NaN(); // Y2K: Handle 2 digit years. if (year) { int yearValue = year.value(); if (yearValue >= 0 && yearValue < 100) { if (yearValue < 50) yearValue += 2000; else yearValue += 1900; } year = yearValue; } else { // We select 2000 as default value. This is because of the following reasons. // 1. Year 2000 was used for the initial value of the variable `year`. While it won't be posed to users in WebKit, // V8 used this 2000 as its default value. (As of April 2017, V8 is using the year 2001 and Spider Monkey is // not doing this kind of fallback.) // 2. It is a leap year. When using `new Date("Feb 29")`, we assume that people want to save month and day. // Leap year can save user inputs if they is valid. If we use the current year instead, the current year // may not be a leap year. In that case, `new Date("Feb 29").getMonth()` becomes 2 (March). year = 2000; } ASSERT(year); return ymdhmsToSeconds(year.value(), month + 1, day, hour, minute, second) * msPerSecond; }
bool isMediaTypeCharacter(UChar c) { return !isASCIISpace(c) && c != '/'; }
static bool isOriginSeparator(UChar ch) { return isASCIISpace(ch) || ch == ','; }
LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) { while (m_ptr < m_end && isASCIISpace(*m_ptr)) ++m_ptr; ASSERT(m_ptr <= m_end); if (m_ptr >= m_end) { token.type = TokEnd; token.start = token.end = m_ptr; return TokEnd; } token.type = TokError; token.start = m_ptr; switch (*m_ptr) { case '[': token.type = TokLBracket; token.end = ++m_ptr; return TokLBracket; case ']': token.type = TokRBracket; token.end = ++m_ptr; return TokRBracket; case '(': token.type = TokLParen; token.end = ++m_ptr; return TokLBracket; case ')': token.type = TokRParen; token.end = ++m_ptr; return TokRBracket; case '{': token.type = TokLBrace; token.end = ++m_ptr; return TokLBrace; case '}': token.type = TokRBrace; token.end = ++m_ptr; return TokRBrace; case ',': token.type = TokComma; token.end = ++m_ptr; return TokComma; case ':': token.type = TokColon; token.end = ++m_ptr; return TokColon; case '"': if (m_mode == StrictJSON) return lexString<StrictJSON>(token); return lexString<NonStrictJSON>(token); case 't': if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { m_ptr += 4; token.type = TokTrue; token.end = m_ptr; return TokTrue; } break; case 'f': if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { m_ptr += 5; token.type = TokFalse; token.end = m_ptr; return TokFalse; } break; case 'n': if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { m_ptr += 4; token.type = TokNull; token.end = m_ptr; return TokNull; } break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return lexNumber(token); } return TokError; }
double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const { if (size() == 1) { UChar c = data()[0]; if (isASCIIDigit(c)) return c - '0'; if (isASCIISpace(c) && tolerateEmptyString) return 0; return NaN; } // FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk // after the number, even if it contains invalid UTF-16 sequences. So we // shouldn't use the UTF8String function, which returns null when it // encounters invalid UTF-16. Further, we have no need to convert the // non-ASCII characters to UTF-8, so the UTF8String does quite a bit of // unnecessary work. CString s = UTF8String(); if (s.isNull()) return NaN; const char* c = s.data(); // skip leading white space while (isASCIISpace(*c)) c++; // empty string ? if (*c == '\0') return tolerateEmptyString ? 0.0 : NaN; double d; // hex number ? if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) { const char* firstDigitPosition = c + 2; c++; d = 0.0; while (*(++c)) { if (*c >= '0' && *c <= '9') d = d * 16.0 + *c - '0'; else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f')) d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0; else break; } if (d >= mantissaOverflowLowerBound) d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16); } else { // regular number ? char* end; d = WTI::strtod(c, &end); if ((d != 0.0 || end != c) && d != Inf && d != -Inf) { c = end; } else { double sign = 1.0; if (*c == '+') c++; else if (*c == '-') { sign = -1.0; c++; } // We used strtod() to do the conversion. However, strtod() handles // infinite values slightly differently than Ti in that it // converts the string "inf" with any capitalization to infinity, // whereas the ECMA spec requires that it be converted to NaN. if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') { d = sign * Inf; c += 8; } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i') c = end; else return NaN; } } // allow trailing white space while (isASCIISpace(*c)) c++; // don't allow anything after - unless tolerant=true // FIXME: If string contains a U+0000 character, then this check is incorrect. if (!tolerateTrailingJunk && *c != '\0') d = NaN; return d; }
double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const { if (size() == 1) { UChar c = data()[0]; if (isASCIIDigit(c)) return c - '0'; if (isASCIISpace(c) && tolerateEmptyString) return 0; return NaN; } // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk // after the number, so this is too strict a check. CStringBuffer s; if (!getCString(s)) return NaN; const char* c = s.data(); // skip leading white space while (isASCIISpace(*c)) c++; // empty string ? if (*c == '\0') return tolerateEmptyString ? 0.0 : NaN; double d; // hex number ? if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) { const char* firstDigitPosition = c + 2; c++; d = 0.0; while (*(++c)) { if (*c >= '0' && *c <= '9') d = d * 16.0 + *c - '0'; else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f')) d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0; else break; } if (d >= mantissaOverflowLowerBound) d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16); } else { // regular number ? char* end; d = WTF::strtod(c, &end); if ((d != 0.0 || end != c) && d != Inf && d != -Inf) { c = end; } else { double sign = 1.0; if (*c == '+') c++; else if (*c == '-') { sign = -1.0; c++; } // We used strtod() to do the conversion. However, strtod() handles // infinite values slightly differently than JavaScript in that it // converts the string "inf" with any capitalization to infinity, // whereas the ECMA spec requires that it be converted to NaN. if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') { d = sign * Inf; c += 8; } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i') c = end; else return NaN; } } // allow trailing white space while (isASCIISpace(*c)) c++; // don't allow anything after - unless tolerant=true if (!tolerateTrailingJunk && *c != '\0') d = NaN; return d; }
bool Options::setOptions(const char* optionsStr) { Vector<char*> options; size_t length = strlen(optionsStr); char* optionsStrCopy = WTF::fastStrDup(optionsStr); char* end = optionsStrCopy + length; char* p = optionsStrCopy; while (p < end) { // Skip white space. while (p < end && isASCIISpace(*p)) p++; if (p == end) break; char* optionStart = p; p = strstr(p, "="); if (!p) { dataLogF("'=' not found in option string: %p\n", optionStart); return false; } p++; char* valueBegin = p; bool hasStringValue = false; const int minStringLength = 2; // The min is an empty string i.e. 2 double quotes. if ((p + minStringLength < end) && (*p == '"')) { p = strstr(p + 1, "\""); if (!p) { dataLogF("Missing trailing '\"' in option string: %p\n", optionStart); return false; // End of string not found. } hasStringValue = true; } // Find next white space. while (p < end && !isASCIISpace(*p)) p++; if (!p) p = end; // No more " " separator. Hence, this is the last arg. // If we have a well-formed string value, strip the quotes. if (hasStringValue) { char* valueEnd = p; ASSERT((*valueBegin == '"') && ((valueEnd - valueBegin) >= minStringLength) && (valueEnd[-1] == '"')); memmove(valueBegin, valueBegin + 1, valueEnd - valueBegin - minStringLength); valueEnd[-minStringLength] = '\0'; } // Strip leading -- if present. if ((p - optionStart > 2) && optionStart[0] == '-' && optionStart[1] == '-') optionStart += 2; *p++ = '\0'; options.append(optionStart); } bool success = true; for (auto& option : options) { bool optionSuccess = setOption(option); if (!optionSuccess) { dataLogF("Failed to set option : %s\n", option); success = false; } } dumpOptionsIfNeeded(); return success; }
static bool isSourceCharacter(UChar c) { return !isASCIISpace(c); }
bool isNotASCIISpace(UChar c) { return !isASCIISpace(c); }
// Odd case where 'exec' is allowed to be 0, to accomodate a caller in WebCore. static double parseDateFromNullTerminatedCharacters(const char* dateString, bool& haveTZ, int& offset) { haveTZ = false; offset = 0; // This parses a date in the form: // Tuesday, 09-Nov-99 23:12:40 GMT // or // Sat, 01-Jan-2000 08:00:00 GMT // or // Sat, 01 Jan 2000 08:00:00 GMT // or // 01 Jan 99 22:00 +0100 (exceptions in rfc822/rfc2822) // ### non RFC formats, added for Javascript: // [Wednesday] January 09 1999 23:12:40 GMT // [Wednesday] January 09 23:12:40 GMT 1999 // // We ignore the weekday. // Skip leading space skipSpacesAndComments(dateString); long month = -1; const char *wordStart = dateString; // Check contents of first words if not number while (*dateString && !isASCIIDigit(*dateString)) { if (isASCIISpace(*dateString) || *dateString == '(') { if (dateString - wordStart >= 3) month = findMonth(wordStart); skipSpacesAndComments(dateString); wordStart = dateString; } else dateString++; } // Missing delimiter between month and day (like "January29")? if (month == -1 && wordStart != dateString) month = findMonth(wordStart); skipSpacesAndComments(dateString); if (!*dateString) return NaN; // ' 09-Nov-99 23:12:40 GMT' char* newPosStr; long day; if (!parseLong(dateString, &newPosStr, 10, &day)) return NaN; dateString = newPosStr; if (!*dateString) return NaN; if (day < 0) return NaN; long year = 0; if (day > 31) { // ### where is the boundary and what happens below? if (*dateString != '/') return NaN; // looks like a YYYY/MM/DD date if (!*++dateString) return NaN; year = day; if (!parseLong(dateString, &newPosStr, 10, &month)) return NaN; month -= 1; dateString = newPosStr; if (*dateString++ != '/' || !*dateString) return NaN; if (!parseLong(dateString, &newPosStr, 10, &day)) return NaN; dateString = newPosStr; } else if (*dateString == '/' && month == -1) { dateString++; // This looks like a MM/DD/YYYY date, not an RFC date. month = day - 1; // 0-based if (!parseLong(dateString, &newPosStr, 10, &day)) return NaN; if (day < 1 || day > 31) return NaN; dateString = newPosStr; if (*dateString == '/') dateString++; if (!*dateString) return NaN; } else { if (*dateString == '-') dateString++; skipSpacesAndComments(dateString); if (*dateString == ',') dateString++; if (month == -1) { // not found yet month = findMonth(dateString); if (month == -1) return NaN; while (*dateString && *dateString != '-' && *dateString != ',' && !isASCIISpace(*dateString)) dateString++; if (!*dateString) return NaN; // '-99 23:12:40 GMT' if (*dateString != '-' && *dateString != '/' && *dateString != ',' && !isASCIISpace(*dateString)) return NaN; dateString++; } } if (month < 0 || month > 11) return NaN; // '99 23:12:40 GMT' if (year <= 0 && *dateString) { if (!parseLong(dateString, &newPosStr, 10, &year)) return NaN; } // Don't fail if the time is missing. long hour = 0; long minute = 0; long second = 0; if (!*newPosStr) dateString = newPosStr; else { // ' 23:12:40 GMT' if (!(isASCIISpace(*newPosStr) || *newPosStr == ',')) { if (*newPosStr != ':') return NaN; // There was no year; the number was the hour. year = -1; } else { // in the normal case (we parsed the year), advance to the next number dateString = ++newPosStr; skipSpacesAndComments(dateString); } parseLong(dateString, &newPosStr, 10, &hour); // Do not check for errno here since we want to continue // even if errno was set becasue we are still looking // for the timezone! // Read a number? If not, this might be a timezone name. if (newPosStr != dateString) { dateString = newPosStr; if (hour < 0 || hour > 23) return NaN; if (!*dateString) return NaN; // ':12:40 GMT' if (*dateString++ != ':') return NaN; if (!parseLong(dateString, &newPosStr, 10, &minute)) return NaN; dateString = newPosStr; if (minute < 0 || minute > 59) return NaN; // ':40 GMT' if (*dateString && *dateString != ':' && !isASCIISpace(*dateString)) return NaN; // seconds are optional in rfc822 + rfc2822 if (*dateString ==':') { dateString++; if (!parseLong(dateString, &newPosStr, 10, &second)) return NaN; dateString = newPosStr; if (second < 0 || second > 59) return NaN; } skipSpacesAndComments(dateString); if (strncasecmp(dateString, "AM", 2) == 0) { if (hour > 12) return NaN; if (hour == 12) hour = 0; dateString += 2; skipSpacesAndComments(dateString); } else if (strncasecmp(dateString, "PM", 2) == 0) { if (hour > 12) return NaN; if (hour != 12) hour += 12; dateString += 2; skipSpacesAndComments(dateString); } } } // Don't fail if the time zone is missing. // Some websites omit the time zone (4275206). if (*dateString) { if (strncasecmp(dateString, "GMT", 3) == 0 || strncasecmp(dateString, "UTC", 3) == 0) { dateString += 3; haveTZ = true; } if (*dateString == '+' || *dateString == '-') { long o; if (!parseLong(dateString, &newPosStr, 10, &o)) return NaN; dateString = newPosStr; if (o < -9959 || o > 9959) return NaN; int sgn = (o < 0) ? -1 : 1; o = labs(o); if (*dateString != ':') { offset = ((o / 100) * 60 + (o % 100)) * sgn; } else { // GMT+05:00 long o2; if (!parseLong(dateString, &newPosStr, 10, &o2)) return NaN; dateString = newPosStr; offset = (o * 60 + o2) * sgn; } haveTZ = true; } else { for (int i = 0; i < int(sizeof(known_zones) / sizeof(KnownZone)); i++) { if (0 == strncasecmp(dateString, known_zones[i].tzName, strlen(known_zones[i].tzName))) { offset = known_zones[i].tzOffset; dateString += strlen(known_zones[i].tzName); haveTZ = true; break; } } } } skipSpacesAndComments(dateString); if (*dateString && year == -1) { if (!parseLong(dateString, &newPosStr, 10, &year)) return NaN; dateString = newPosStr; } skipSpacesAndComments(dateString); // Trailing garbage if (*dateString) return NaN; // Y2K: Handle 2 digit years. if (year >= 0 && year < 100) { if (year < 50) year += 2000; else year += 1900; } return ymdhmsToSeconds(year, month + 1, day, hour, minute, second) * msPerSecond; }
LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) { while (m_ptr < m_end && isASCIISpace(*m_ptr)) ++m_ptr; ASSERT(m_ptr <= m_end); if (m_ptr >= m_end) { token.type = TokEnd; token.start = token.end = m_ptr; return TokEnd; } token.type = TokError; token.start = m_ptr; switch (*m_ptr) { case '[': token.type = TokLBracket; token.end = ++m_ptr; return TokLBracket; case ']': token.type = TokRBracket; token.end = ++m_ptr; return TokRBracket; case '(': token.type = TokLParen; token.end = ++m_ptr; return TokLBracket; case ')': token.type = TokRParen; token.end = ++m_ptr; return TokRBracket; case '{': token.type = TokLBrace; token.end = ++m_ptr; return TokLBrace; case '}': token.type = TokRBrace; token.end = ++m_ptr; return TokRBrace; case ',': token.type = TokComma; token.end = ++m_ptr; return TokComma; case ':': token.type = TokColon; token.end = ++m_ptr; return TokColon; case '"': case '\'': return lexString(token); // Numbers are trickier so we only allow the most basic form, basically // * [1-9][0-9]*(\.[0-9]*)? // * \.[0-9]* // * 0(\.[0-9]*)? case '0': // If a number starts with 0 it's expected to be octal. It seems silly // to attempt to handle this case, so we abort if (m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1])) return TokError; return lexNumber(token); case '.': // If a number starts with a '.' it must be followed by a digit if (!(m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1]))) return TokError; return lexNumber(token); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return lexNumber(token); } return TokError; }
bool isCSPDirectiveValueCharacter(UChar c) { return isASCIISpace(c) || (c >= 0x21 && c <= 0x7e); // Whitespace + VCHAR }