Beispiel #1
0
IDBKeyPathLexer::TokenType IDBKeyPathLexer::lex(IDBKeyPathElement& element)
{
    while (m_ptr < m_end && isASCIISpace(*m_ptr))
        ++m_ptr;

    if (m_ptr >= m_end)
        return TokenEnd;

    ASSERT(m_ptr < m_end);
    switch (*m_ptr) {
    case '[':
        ++m_ptr;
        return TokenLeftBracket;
    case ']':
        ++m_ptr;
        return TokenRightBracket;
    case '.':
        ++m_ptr;
        return TokenDot;
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
        return lexNumber(element);
    default:
        return lexIdentifier(element);
    }
    return TokenError;
}
// source-list       = *WSP [ source *( 1*WSP source ) *WSP ]
//                   / *WSP "'none'" *WSP
//
void CSPSourceList::parse(const UChar* begin, const UChar* end)
{
    const UChar* position = begin;

    bool isFirstSourceInList = true;
    while (position < end) {
        skipWhile<isASCIISpace>(position, end);
        const UChar* beginSource = position;
        skipWhile<isSourceCharacter>(position, end);

        if (isFirstSourceInList && equalIgnoringCase("'none'", beginSource, position - beginSource))
            return; // We represent 'none' as an empty m_list.
        isFirstSourceInList = false;

        String scheme, host;
        int port = 0;
        bool hostHasWildcard = false;
        bool portHasWildcard = false;

        if (parseSource(beginSource, position, scheme, host, port, hostHasWildcard, portHasWildcard)) {
            if (scheme.isEmpty())
                scheme = m_origin->protocol();
            m_list.append(CSPSource(scheme, host, port, hostHasWildcard, portHasWildcard));
        }

        ASSERT(position == end || isASCIISpace(*position));
     }
}
String CSPDirectiveList::parseSuboriginName(const String& policy)
{
    Vector<UChar> characters;
    policy.appendTo(characters);

    const UChar* position = characters.data();
    const UChar* end = position + characters.size();

    // Parse the name of the suborigin (no spaces, single string)
    skipWhile<UChar, isASCIISpace>(position, end);
    if (position == end) {
        m_policy->reportInvalidSuboriginFlags("No suborigin name specified.");
        return String();
    }

    const UChar* begin = position;

    skipWhile<UChar, isASCIIAlphanumeric>(position, end);
    if (position != end && !isASCIISpace(*position)) {
        m_policy->reportInvalidSuboriginFlags("Invalid character \'" + String(position, 1) + "\' in suborigin.");
        return String();
    }
    size_t length = position - begin;
    skipWhile<UChar, isASCIISpace>(position, end);
    if (position != end) {
        m_policy->reportInvalidSuboriginFlags("Whitespace is not allowed in suborigin names.");
        return String();
    }

    return String(begin, length);
}
void MediaListDirective::parse(const UChar* begin, const UChar* end)
{
    const UChar* position = begin;

    // 'plugin-types ____;' OR 'plugin-types;'
    if (position == end) {
        policy()->reportInvalidPluginTypes(String());
        return;
    }

    while (position < end) {
        // _____ OR _____mime1/mime1
        // ^        ^
        skipWhile<UChar, isASCIISpace>(position, end);
        if (position == end)
            return;

        // mime1/mime1 mime2/mime2
        // ^
        begin = position;
        if (!skipExactly<UChar, isMediaTypeCharacter>(position, end)) {
            skipWhile<UChar, isNotASCIISpace>(position, end);
            policy()->reportInvalidPluginTypes(String(begin, position - begin));
            continue;
        }
        skipWhile<UChar, isMediaTypeCharacter>(position, end);

        // mime1/mime1 mime2/mime2
        //      ^
        if (!skipExactly<UChar>(position, end, '/')) {
            skipWhile<UChar, isNotASCIISpace>(position, end);
            policy()->reportInvalidPluginTypes(String(begin, position - begin));
            continue;
        }

        // mime1/mime1 mime2/mime2
        //       ^
        if (!skipExactly<UChar, isMediaTypeCharacter>(position, end)) {
            skipWhile<UChar, isNotASCIISpace>(position, end);
            policy()->reportInvalidPluginTypes(String(begin, position - begin));
            continue;
        }
        skipWhile<UChar, isMediaTypeCharacter>(position, end);

        // mime1/mime1 mime2/mime2 OR mime1/mime1  OR mime1/mime1/error
        //            ^                          ^               ^
        if (position < end && isNotASCIISpace(*position)) {
            skipWhile<UChar, isNotASCIISpace>(position, end);
            policy()->reportInvalidPluginTypes(String(begin, position - begin));
            continue;
        }
        m_pluginTypes.add(String(begin, position - begin));

        ASSERT(position == end || isASCIISpace(*position));
    }
}
Beispiel #5
0
bool StringImpl::containsOnlyWhitespace()
{
    // FIXME: The definition of whitespace here includes a number of characters
    // that are not whitespace from the point of view of RenderText; I wonder if
    // that's a problem in practice.
    for (unsigned i = 0; i < m_length; i++)
        if (!isASCIISpace(m_data[i]))
            return false;
    return true;
}
static inline String nextToken(FILE* file)
{
    ASSERT(file);
    if (!file)
        return String();

    char buffer[maxBuffer] = {0, };
    unsigned int index = 0;
    while (index < maxBuffer) {
        int ch = fgetc(file);
        if (ch == EOF || (isASCIISpace(ch) && index)) // Break on non-initial ASCII space.
            break;
        if (!isASCIISpace(ch)) {
            buffer[index] = ch;
            index++;
        }
    }

    return String(buffer);
}
static inline String nextToken(FILE* file)
{
    if (!file)
        return String();

    static const unsigned bufferSize = 128;
    char buffer[bufferSize] = {0, };
    unsigned index = 0;
    while (index < bufferSize) {
        int ch = fgetc(file);
        if (ch == EOF || (isASCIISpace(ch) && index)) // Break on non-initial ASCII space.
            break;
        if (!isASCIISpace(ch)) {
            buffer[index] = ch;
            index++;
        }
    }

    return String(buffer);
}
static bool hasDisallowedCharacters(const char* str, size_t length)
{
    while (length--) {
        char c = *str++;
        // '{' is also disallowed, but we don't need to check for it because
        // parseClause() searches for '{' as the end of the start delimiter.
        // As a result, the parsed delimiter string will never include '{'.
        if (c == '}' || isASCIISpace(c))
            return true;
    }
    return false;
}
static String stripLeadingWhiteSpace(const String& string)
{
    unsigned length = string.length();

    unsigned i;
    for (i = 0; i < length; ++i) {
        if (string[i] != noBreakSpace && (string[i] <= 0x7F ? !isASCIISpace(string[i]) : (direction(string[i]) != WhiteSpaceNeutral)))
            break;
    }

    return string.substring(i, length - i);
}
Beispiel #10
0
inline static void skipSpacesAndComments(const char*& s)
{
    int nesting = 0;
    char ch;
    while ((ch = *s)) {
        if (!isASCIISpace(ch)) {
            if (ch == '(')
                nesting++;
            else if (ch == ')' && nesting > 0)
                nesting--;
            else if (nesting == 0)
                break;
        }
        s++;
    }
}
static inline double toDoubleType(const CharType* data, size_t length, bool* ok, size_t& parsedLength)
{
    size_t leadingSpacesLength = 0;
    while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength]))
        ++leadingSpacesLength;

    double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength);
    if (!parsedLength) {
        if (ok)
            *ok = false;
        return 0.0;
    }

    parsedLength += leadingSpacesLength;
    if (ok)
        *ok = policy == AllowTrailingJunk || parsedLength == length;
    return number;
}
Beispiel #12
0
bool DOMImplementation::isJSONMIMEType(const String& mimeType)
{
    if (mimeType.startsWith("application/json", false))
        return true;
    if (mimeType.startsWith("application/", false)) {
        size_t subtype = mimeType.find("+json", 12, false);
        if (subtype != kNotFound) {
            // Just check that a parameter wasn't matched.
            size_t parameterMarker = mimeType.find(";");
            if (parameterMarker == kNotFound) {
                unsigned endSubtype = static_cast<unsigned>(subtype) + 5;
                return endSubtype == mimeType.length() || isASCIISpace(mimeType[endSubtype]);
            }
            return parameterMarker > subtype;
        }
    }
    return false;
}
// source-list       = *WSP [ source *( 1*WSP source ) *WSP ]
//                   / *WSP "'none'" *WSP
//
void ContentSecurityPolicySourceList::parse(const UChar* begin, const UChar* end)
{
    const UChar* position = begin;

    while (position < end) {
        skipWhile<UChar, isASCIISpace>(position, end);
        if (position == end)
            return;

        const UChar* beginSource = position;
        skipWhile<UChar, isSourceCharacter>(position, end);

        String scheme, host, path;
        int port = 0;
        bool hostHasWildcard = false;
        bool portHasWildcard = false;

        if (parseNonceSource(beginSource, position))
            continue;

        if (parseHashSource(beginSource, position))
            continue;

        if (parseSource(beginSource, position, scheme, host, port, path, hostHasWildcard, portHasWildcard)) {
            // Wildcard hosts and keyword sources ('self', 'unsafe-inline',
            // etc.) aren't stored in m_list, but as attributes on the source
            // list itself.
            if (scheme.isEmpty() && host.isEmpty())
                continue;
            if (isCSPDirectiveName(host))
                m_policy.reportDirectiveAsSourceExpression(m_directiveName, host);
            m_list.append(ContentSecurityPolicySource(m_policy, scheme, host, port, path, hostHasWildcard, portHasWildcard));
        } else
            m_policy.reportInvalidSourceExpression(m_directiveName, String(beginSource, position - beginSource));

        ASSERT(position == end || isASCIISpace(*position));
    }
}
Beispiel #14
0
// source-list       = *WSP [ source *( 1*WSP source ) *WSP ]
//                   / *WSP "'none'" *WSP
//
void CSPSourceList::parse(const UChar* begin, const UChar* end)
{
    // We represent 'none' as an empty m_list.
    if (isSourceListNone(begin, end))
        return;

    const UChar* position = begin;
    while (position < end) {
        skipWhile<UChar, isASCIISpace>(position, end);
        if (position == end)
            return;

        const UChar* beginSource = position;
        skipWhile<UChar, isSourceCharacter>(position, end);

        String scheme, host, path;
        int port = 0;
        CSPSource::WildcardDisposition hostWildcard = CSPSource::NoWildcard;
        CSPSource::WildcardDisposition portWildcard = CSPSource::NoWildcard;

        if (parseSource(beginSource, position, scheme, host, port, path, hostWildcard, portWildcard)) {
            // Wildcard hosts and keyword sources ('self', 'unsafe-inline',
            // etc.) aren't stored in m_list, but as attributes on the source
            // list itself.
            if (scheme.isEmpty() && host.isEmpty())
                continue;
            if (m_policy->isDirectiveName(host))
                m_policy->reportDirectiveAsSourceExpression(m_directiveName, host);
            m_list.append(CSPSource(m_policy, scheme, host, port, path, hostWildcard, portWildcard));
        } else {
            m_policy->reportInvalidSourceExpression(m_directiveName, String(beginSource, position - beginSource));
        }

        ASSERT(position == end || isASCIISpace(*position));
    }
}
Beispiel #15
0
// Odd case where 'exec' is allowed to be 0, to accomodate a caller in WebCore.
double parseDateFromNullTerminatedCharacters(const char* dateString, bool& haveTZ, int& offset)
{
    haveTZ = false;
    offset = 0;

    // This parses a date in the form:
    //     Tuesday, 09-Nov-99 23:12:40 GMT
    // or
    //     Sat, 01-Jan-2000 08:00:00 GMT
    // or
    //     Sat, 01 Jan 2000 08:00:00 GMT
    // or
    //     01 Jan 99 22:00 +0100    (exceptions in rfc822/rfc2822)
    // ### non RFC formats, added for Javascript:
    //     [Wednesday] January 09 1999 23:12:40 GMT
    //     [Wednesday] January 09 23:12:40 GMT 1999
    //
    // We ignore the weekday.
     
    // Skip leading space
    skipSpacesAndComments(dateString);

    long month = -1;
    const char *wordStart = dateString;
    // Check contents of first words if not number
    while (*dateString && !isASCIIDigit(*dateString)) {
        if (isASCIISpace(*dateString) || *dateString == '(') {
            if (dateString - wordStart >= 3)
                month = findMonth(wordStart);
            skipSpacesAndComments(dateString);
            wordStart = dateString;
        } else
           dateString++;
    }

    // Missing delimiter between month and day (like "January29")?
    if (month == -1 && wordStart != dateString)
        month = findMonth(wordStart);

    skipSpacesAndComments(dateString);

    if (!*dateString)
        return std::numeric_limits<double>::quiet_NaN();

    // ' 09-Nov-99 23:12:40 GMT'
    char* newPosStr;
    long day;
    if (!parseLong(dateString, &newPosStr, 10, &day))
        return std::numeric_limits<double>::quiet_NaN();
    dateString = newPosStr;

    if (day < 0)
        return std::numeric_limits<double>::quiet_NaN();

    std::optional<int> year;
    if (day > 31) {
        // ### where is the boundary and what happens below?
        if (*dateString != '/')
            return std::numeric_limits<double>::quiet_NaN();
        // looks like a YYYY/MM/DD date
        if (!*++dateString)
            return std::numeric_limits<double>::quiet_NaN();
        if (day <= std::numeric_limits<int>::min() || day >= std::numeric_limits<int>::max())
            return std::numeric_limits<double>::quiet_NaN();
        year = static_cast<int>(day);
        if (!parseLong(dateString, &newPosStr, 10, &month))
            return std::numeric_limits<double>::quiet_NaN();
        month -= 1;
        dateString = newPosStr;
        if (*dateString++ != '/' || !*dateString)
            return std::numeric_limits<double>::quiet_NaN();
        if (!parseLong(dateString, &newPosStr, 10, &day))
            return std::numeric_limits<double>::quiet_NaN();
        dateString = newPosStr;
    } else if (*dateString == '/' && month == -1) {
        dateString++;
        // This looks like a MM/DD/YYYY date, not an RFC date.
        month = day - 1; // 0-based
        if (!parseLong(dateString, &newPosStr, 10, &day))
            return std::numeric_limits<double>::quiet_NaN();
        if (day < 1 || day > 31)
            return std::numeric_limits<double>::quiet_NaN();
        dateString = newPosStr;
        if (*dateString == '/')
            dateString++;
        if (!*dateString)
            return std::numeric_limits<double>::quiet_NaN();
     } else {
        if (*dateString == '-')
            dateString++;

        skipSpacesAndComments(dateString);

        if (*dateString == ',')
            dateString++;

        if (month == -1) { // not found yet
            month = findMonth(dateString);
            if (month == -1)
                return std::numeric_limits<double>::quiet_NaN();

            while (*dateString && *dateString != '-' && *dateString != ',' && !isASCIISpace(*dateString))
                dateString++;

            if (!*dateString)
                return std::numeric_limits<double>::quiet_NaN();

            // '-99 23:12:40 GMT'
            if (*dateString != '-' && *dateString != '/' && *dateString != ',' && !isASCIISpace(*dateString))
                return std::numeric_limits<double>::quiet_NaN();
            dateString++;
        }
    }

    if (month < 0 || month > 11)
        return std::numeric_limits<double>::quiet_NaN();

    // '99 23:12:40 GMT'
    if (*dateString && !year) {
        int result = 0;
        if (!parseInt(dateString, &newPosStr, 10, &result))
            return std::numeric_limits<double>::quiet_NaN();
        year = result;
    }

    // Don't fail if the time is missing.
    long hour = 0;
    long minute = 0;
    long second = 0;
    if (!*newPosStr)
        dateString = newPosStr;
    else {
        // ' 23:12:40 GMT'
        if (!(isASCIISpace(*newPosStr) || *newPosStr == ',')) {
            if (*newPosStr != ':')
                return std::numeric_limits<double>::quiet_NaN();
            // There was no year; the number was the hour.
            year = std::nullopt;
        } else {
            // in the normal case (we parsed the year), advance to the next number
            dateString = ++newPosStr;
            skipSpacesAndComments(dateString);
        }

        parseLong(dateString, &newPosStr, 10, &hour);
        // Do not check for errno here since we want to continue
        // even if errno was set becasue we are still looking
        // for the timezone!

        // Read a number? If not, this might be a timezone name.
        if (newPosStr != dateString) {
            dateString = newPosStr;

            if (hour < 0 || hour > 23)
                return std::numeric_limits<double>::quiet_NaN();

            if (!*dateString)
                return std::numeric_limits<double>::quiet_NaN();

            // ':12:40 GMT'
            if (*dateString++ != ':')
                return std::numeric_limits<double>::quiet_NaN();

            if (!parseLong(dateString, &newPosStr, 10, &minute))
                return std::numeric_limits<double>::quiet_NaN();
            dateString = newPosStr;

            if (minute < 0 || minute > 59)
                return std::numeric_limits<double>::quiet_NaN();

            // ':40 GMT'
            if (*dateString && *dateString != ':' && !isASCIISpace(*dateString))
                return std::numeric_limits<double>::quiet_NaN();

            // seconds are optional in rfc822 + rfc2822
            if (*dateString ==':') {
                dateString++;

                if (!parseLong(dateString, &newPosStr, 10, &second))
                    return std::numeric_limits<double>::quiet_NaN();
                dateString = newPosStr;

                if (second < 0 || second > 59)
                    return std::numeric_limits<double>::quiet_NaN();
            }

            skipSpacesAndComments(dateString);

            if (startsWithLettersIgnoringASCIICase(dateString, "am")) {
                if (hour > 12)
                    return std::numeric_limits<double>::quiet_NaN();
                if (hour == 12)
                    hour = 0;
                dateString += 2;
                skipSpacesAndComments(dateString);
            } else if (startsWithLettersIgnoringASCIICase(dateString, "pm")) {
                if (hour > 12)
                    return std::numeric_limits<double>::quiet_NaN();
                if (hour != 12)
                    hour += 12;
                dateString += 2;
                skipSpacesAndComments(dateString);
            }
        }
    }
    
    // The year may be after the time but before the time zone.
    if (isASCIIDigit(*dateString) && !year) {
        int result = 0;
        if (!parseInt(dateString, &newPosStr, 10, &result))
            return std::numeric_limits<double>::quiet_NaN();
        year = result;
        dateString = newPosStr;
        skipSpacesAndComments(dateString);
    }

    // Don't fail if the time zone is missing. 
    // Some websites omit the time zone (4275206).
    if (*dateString) {
        if (startsWithLettersIgnoringASCIICase(dateString, "gmt") || startsWithLettersIgnoringASCIICase(dateString, "utc")) {
            dateString += 3;
            haveTZ = true;
        }

        if (*dateString == '+' || *dateString == '-') {
            int o;
            if (!parseInt(dateString, &newPosStr, 10, &o))
                return std::numeric_limits<double>::quiet_NaN();
            dateString = newPosStr;

            if (o < -9959 || o > 9959)
                return std::numeric_limits<double>::quiet_NaN();

            int sgn = (o < 0) ? -1 : 1;
            o = abs(o);
            if (*dateString != ':') {
                if (o >= 24)
                    offset = ((o / 100) * 60 + (o % 100)) * sgn;
                else
                    offset = o * 60 * sgn;
            } else { // GMT+05:00
                ++dateString; // skip the ':'
                int o2;
                if (!parseInt(dateString, &newPosStr, 10, &o2))
                    return std::numeric_limits<double>::quiet_NaN();
                dateString = newPosStr;
                offset = (o * 60 + o2) * sgn;
            }
            haveTZ = true;
        } else {
            for (auto& knownZone : knownZones) {
                // Since the passed-in length is used for both strings, the following checks that
                // dateString has the time zone name as a prefix, not that it is equal.
                auto length = strlen(knownZone.tzName);
                if (equalLettersIgnoringASCIICase(dateString, knownZone.tzName, length)) {
                    offset = knownZone.tzOffset;
                    dateString += length;
                    haveTZ = true;
                    break;
                }
            }
        }
    }

    skipSpacesAndComments(dateString);

    if (*dateString && !year) {
        int result = 0;
        if (!parseInt(dateString, &newPosStr, 10, &result))
            return std::numeric_limits<double>::quiet_NaN();
        year = result;
        dateString = newPosStr;
        skipSpacesAndComments(dateString);
    }

    // Trailing garbage
    if (*dateString)
        return std::numeric_limits<double>::quiet_NaN();

    // Y2K: Handle 2 digit years.
    if (year) {
        int yearValue = year.value();
        if (yearValue >= 0 && yearValue < 100) {
            if (yearValue < 50)
                yearValue += 2000;
            else
                yearValue += 1900;
        }
        year = yearValue;
    } else {
        // We select 2000 as default value. This is because of the following reasons.
        // 1. Year 2000 was used for the initial value of the variable `year`. While it won't be posed to users in WebKit,
        //    V8 used this 2000 as its default value. (As of April 2017, V8 is using the year 2001 and Spider Monkey is
        //    not doing this kind of fallback.)
        // 2. It is a leap year. When using `new Date("Feb 29")`, we assume that people want to save month and day.
        //    Leap year can save user inputs if they is valid. If we use the current year instead, the current year
        //    may not be a leap year. In that case, `new Date("Feb 29").getMonth()` becomes 2 (March).
        year = 2000;
    }
    ASSERT(year);
    
    return ymdhmsToSeconds(year.value(), month + 1, day, hour, minute, second) * msPerSecond;
}
bool isMediaTypeCharacter(UChar c)
{
    return !isASCIISpace(c) && c != '/';
}
static bool isOriginSeparator(UChar ch)
{
    return isASCIISpace(ch) || ch == ',';
}
LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
{
    while (m_ptr < m_end && isASCIISpace(*m_ptr))
        ++m_ptr;

    ASSERT(m_ptr <= m_end);
    if (m_ptr >= m_end) {
        token.type = TokEnd;
        token.start = token.end = m_ptr;
        return TokEnd;
    }
    token.type = TokError;
    token.start = m_ptr;
    switch (*m_ptr) {
        case '[':
            token.type = TokLBracket;
            token.end = ++m_ptr;
            return TokLBracket;
        case ']':
            token.type = TokRBracket;
            token.end = ++m_ptr;
            return TokRBracket;
        case '(':
            token.type = TokLParen;
            token.end = ++m_ptr;
            return TokLBracket;
        case ')':
            token.type = TokRParen;
            token.end = ++m_ptr;
            return TokRBracket;
        case '{':
            token.type = TokLBrace;
            token.end = ++m_ptr;
            return TokLBrace;
        case '}':
            token.type = TokRBrace;
            token.end = ++m_ptr;
            return TokRBrace;
        case ',':
            token.type = TokComma;
            token.end = ++m_ptr;
            return TokComma;
        case ':':
            token.type = TokColon;
            token.end = ++m_ptr;
            return TokColon;
        case '"':
            if (m_mode == StrictJSON)
                return lexString<StrictJSON>(token);
            return lexString<NonStrictJSON>(token);
        case 't':
            if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
                m_ptr += 4;
                token.type = TokTrue;
                token.end = m_ptr;
                return TokTrue;
            }
            break;
        case 'f':
            if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
                m_ptr += 5;
                token.type = TokFalse;
                token.end = m_ptr;
                return TokFalse;
            }
            break;
        case 'n':
            if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
                m_ptr += 4;
                token.type = TokNull;
                token.end = m_ptr;
                return TokNull;
            }
            break;    
        case '-':
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            return lexNumber(token);
    }
    return TokError;
}
Beispiel #19
0
double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
{
    if (size() == 1) {
        UChar c = data()[0];
        if (isASCIIDigit(c))
            return c - '0';
        if (isASCIISpace(c) && tolerateEmptyString)
            return 0;
        return NaN;
    }

    // FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk 
    // after the number, even if it contains invalid UTF-16 sequences. So we
    // shouldn't use the UTF8String function, which returns null when it
    // encounters invalid UTF-16. Further, we have no need to convert the
    // non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
    // unnecessary work.
    CString s = UTF8String();
    if (s.isNull())
        return NaN;
    const char* c = s.data();

    // skip leading white space
    while (isASCIISpace(*c))
        c++;

    // empty string ?
    if (*c == '\0')
        return tolerateEmptyString ? 0.0 : NaN;

    double d;

    // hex number ?
    if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
        const char* firstDigitPosition = c + 2;
        c++;
        d = 0.0;
        while (*(++c)) {
            if (*c >= '0' && *c <= '9')
                d = d * 16.0 + *c - '0';
            else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
                d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
            else
                break;
        }

        if (d >= mantissaOverflowLowerBound)
            d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
    } else {
        // regular number ?
        char* end;
        d = WTI::strtod(c, &end);
        if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
            c = end;
        } else {
            double sign = 1.0;

            if (*c == '+')
                c++;
            else if (*c == '-') {
                sign = -1.0;
                c++;
            }

            // We used strtod() to do the conversion. However, strtod() handles
            // infinite values slightly differently than Ti in that it
            // converts the string "inf" with any capitalization to infinity,
            // whereas the ECMA spec requires that it be converted to NaN.

            if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
                d = sign * Inf;
                c += 8;
            } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
                c = end;
            else
                return NaN;
        }
    }

    // allow trailing white space
    while (isASCIISpace(*c))
        c++;
    // don't allow anything after - unless tolerant=true
    // FIXME: If string contains a U+0000 character, then this check is incorrect.
    if (!tolerateTrailingJunk && *c != '\0')
        d = NaN;

    return d;
}
double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
{
    if (size() == 1) {
        UChar c = data()[0];
        if (isASCIIDigit(c))
            return c - '0';
        if (isASCIISpace(c) && tolerateEmptyString)
            return 0;
        return NaN;
    }

    // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
    // after the number, so this is too strict a check.
    CStringBuffer s;
    if (!getCString(s))
        return NaN;
    const char* c = s.data();

    // skip leading white space
    while (isASCIISpace(*c))
        c++;

    // empty string ?
    if (*c == '\0')
        return tolerateEmptyString ? 0.0 : NaN;

    double d;

    // hex number ?
    if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
        const char* firstDigitPosition = c + 2;
        c++;
        d = 0.0;
        while (*(++c)) {
            if (*c >= '0' && *c <= '9')
                d = d * 16.0 + *c - '0';
            else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
                d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
            else
                break;
        }

        if (d >= mantissaOverflowLowerBound)
            d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
    } else {
        // regular number ?
        char* end;
        d = WTF::strtod(c, &end);
        if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
            c = end;
        } else {
            double sign = 1.0;

            if (*c == '+')
                c++;
            else if (*c == '-') {
                sign = -1.0;
                c++;
            }

            // We used strtod() to do the conversion. However, strtod() handles
            // infinite values slightly differently than JavaScript in that it
            // converts the string "inf" with any capitalization to infinity,
            // whereas the ECMA spec requires that it be converted to NaN.

            if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
                d = sign * Inf;
                c += 8;
            } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
                c = end;
            else
                return NaN;
        }
    }

    // allow trailing white space
    while (isASCIISpace(*c))
        c++;
    // don't allow anything after - unless tolerant=true
    if (!tolerateTrailingJunk && *c != '\0')
        d = NaN;

    return d;
}
Beispiel #21
0
bool Options::setOptions(const char* optionsStr)
{
    Vector<char*> options;

    size_t length = strlen(optionsStr);
    char* optionsStrCopy = WTF::fastStrDup(optionsStr);
    char* end = optionsStrCopy + length;
    char* p = optionsStrCopy;

    while (p < end) {
        // Skip white space.
        while (p < end && isASCIISpace(*p))
            p++;
        if (p == end)
            break;

        char* optionStart = p;
        p = strstr(p, "=");
        if (!p) {
            dataLogF("'=' not found in option string: %p\n", optionStart);
            return false;
        }
        p++;

        char* valueBegin = p;
        bool hasStringValue = false;
        const int minStringLength = 2; // The min is an empty string i.e. 2 double quotes.
        if ((p + minStringLength < end) && (*p == '"')) {
            p = strstr(p + 1, "\"");
            if (!p) {
                dataLogF("Missing trailing '\"' in option string: %p\n", optionStart);
                return false; // End of string not found.
            }
            hasStringValue = true;
        }

        // Find next white space.
        while (p < end && !isASCIISpace(*p))
            p++;
        if (!p)
            p = end; // No more " " separator. Hence, this is the last arg.

        // If we have a well-formed string value, strip the quotes.
        if (hasStringValue) {
            char* valueEnd = p;
            ASSERT((*valueBegin == '"') && ((valueEnd - valueBegin) >= minStringLength) && (valueEnd[-1] == '"'));
            memmove(valueBegin, valueBegin + 1, valueEnd - valueBegin - minStringLength);
            valueEnd[-minStringLength] = '\0';
        }

        // Strip leading -- if present.
        if ((p -  optionStart > 2) && optionStart[0] == '-' && optionStart[1] == '-')
            optionStart += 2;

        *p++ = '\0';
        options.append(optionStart);
    }

    bool success = true;
    for (auto& option : options) {
        bool optionSuccess = setOption(option);
        if (!optionSuccess) {
            dataLogF("Failed to set option : %s\n", option);
            success = false;
        }
    }

    dumpOptionsIfNeeded();
    return success;
}
static bool isSourceCharacter(UChar c)
{
    return !isASCIISpace(c);
}
bool isNotASCIISpace(UChar c)
{
    return !isASCIISpace(c);
}
Beispiel #24
0
// Odd case where 'exec' is allowed to be 0, to accomodate a caller in WebCore.
static double parseDateFromNullTerminatedCharacters(const char* dateString, bool& haveTZ, int& offset)
{
    haveTZ = false;
    offset = 0;

    // This parses a date in the form:
    //     Tuesday, 09-Nov-99 23:12:40 GMT
    // or
    //     Sat, 01-Jan-2000 08:00:00 GMT
    // or
    //     Sat, 01 Jan 2000 08:00:00 GMT
    // or
    //     01 Jan 99 22:00 +0100    (exceptions in rfc822/rfc2822)
    // ### non RFC formats, added for Javascript:
    //     [Wednesday] January 09 1999 23:12:40 GMT
    //     [Wednesday] January 09 23:12:40 GMT 1999
    //
    // We ignore the weekday.

    // Skip leading space
    skipSpacesAndComments(dateString);

    long month = -1;
    const char *wordStart = dateString;
    // Check contents of first words if not number
    while (*dateString && !isASCIIDigit(*dateString)) {
        if (isASCIISpace(*dateString) || *dateString == '(') {
            if (dateString - wordStart >= 3)
                month = findMonth(wordStart);
            skipSpacesAndComments(dateString);
            wordStart = dateString;
        } else
           dateString++;
    }

    // Missing delimiter between month and day (like "January29")?
    if (month == -1 && wordStart != dateString)
        month = findMonth(wordStart);

    skipSpacesAndComments(dateString);

    if (!*dateString)
        return NaN;

    // ' 09-Nov-99 23:12:40 GMT'
    char* newPosStr;
    long day;
    if (!parseLong(dateString, &newPosStr, 10, &day))
        return NaN;
    dateString = newPosStr;

    if (!*dateString)
        return NaN;

    if (day < 0)
        return NaN;

    long year = 0;
    if (day > 31) {
        // ### where is the boundary and what happens below?
        if (*dateString != '/')
            return NaN;
        // looks like a YYYY/MM/DD date
        if (!*++dateString)
            return NaN;
        year = day;
        if (!parseLong(dateString, &newPosStr, 10, &month))
            return NaN;
        month -= 1;
        dateString = newPosStr;
        if (*dateString++ != '/' || !*dateString)
            return NaN;
        if (!parseLong(dateString, &newPosStr, 10, &day))
            return NaN;
        dateString = newPosStr;
    } else if (*dateString == '/' && month == -1) {
        dateString++;
        // This looks like a MM/DD/YYYY date, not an RFC date.
        month = day - 1; // 0-based
        if (!parseLong(dateString, &newPosStr, 10, &day))
            return NaN;
        if (day < 1 || day > 31)
            return NaN;
        dateString = newPosStr;
        if (*dateString == '/')
            dateString++;
        if (!*dateString)
            return NaN;
     } else {
        if (*dateString == '-')
            dateString++;

        skipSpacesAndComments(dateString);

        if (*dateString == ',')
            dateString++;

        if (month == -1) { // not found yet
            month = findMonth(dateString);
            if (month == -1)
                return NaN;

            while (*dateString && *dateString != '-' && *dateString != ',' && !isASCIISpace(*dateString))
                dateString++;

            if (!*dateString)
                return NaN;

            // '-99 23:12:40 GMT'
            if (*dateString != '-' && *dateString != '/' && *dateString != ',' && !isASCIISpace(*dateString))
                return NaN;
            dateString++;
        }
    }

    if (month < 0 || month > 11)
        return NaN;

    // '99 23:12:40 GMT'
    if (year <= 0 && *dateString) {
        if (!parseLong(dateString, &newPosStr, 10, &year))
            return NaN;
    }

    // Don't fail if the time is missing.
    long hour = 0;
    long minute = 0;
    long second = 0;
    if (!*newPosStr)
        dateString = newPosStr;
    else {
        // ' 23:12:40 GMT'
        if (!(isASCIISpace(*newPosStr) || *newPosStr == ',')) {
            if (*newPosStr != ':')
                return NaN;
            // There was no year; the number was the hour.
            year = -1;
        } else {
            // in the normal case (we parsed the year), advance to the next number
            dateString = ++newPosStr;
            skipSpacesAndComments(dateString);
        }

        parseLong(dateString, &newPosStr, 10, &hour);
        // Do not check for errno here since we want to continue
        // even if errno was set becasue we are still looking
        // for the timezone!

        // Read a number? If not, this might be a timezone name.
        if (newPosStr != dateString) {
            dateString = newPosStr;

            if (hour < 0 || hour > 23)
                return NaN;

            if (!*dateString)
                return NaN;

            // ':12:40 GMT'
            if (*dateString++ != ':')
                return NaN;

            if (!parseLong(dateString, &newPosStr, 10, &minute))
                return NaN;
            dateString = newPosStr;

            if (minute < 0 || minute > 59)
                return NaN;

            // ':40 GMT'
            if (*dateString && *dateString != ':' && !isASCIISpace(*dateString))
                return NaN;

            // seconds are optional in rfc822 + rfc2822
            if (*dateString ==':') {
                dateString++;

                if (!parseLong(dateString, &newPosStr, 10, &second))
                    return NaN;
                dateString = newPosStr;

                if (second < 0 || second > 59)
                    return NaN;
            }

            skipSpacesAndComments(dateString);

            if (strncasecmp(dateString, "AM", 2) == 0) {
                if (hour > 12)
                    return NaN;
                if (hour == 12)
                    hour = 0;
                dateString += 2;
                skipSpacesAndComments(dateString);
            } else if (strncasecmp(dateString, "PM", 2) == 0) {
                if (hour > 12)
                    return NaN;
                if (hour != 12)
                    hour += 12;
                dateString += 2;
                skipSpacesAndComments(dateString);
            }
        }
    }

    // Don't fail if the time zone is missing.
    // Some websites omit the time zone (4275206).
    if (*dateString) {
        if (strncasecmp(dateString, "GMT", 3) == 0 || strncasecmp(dateString, "UTC", 3) == 0) {
            dateString += 3;
            haveTZ = true;
        }

        if (*dateString == '+' || *dateString == '-') {
            long o;
            if (!parseLong(dateString, &newPosStr, 10, &o))
                return NaN;
            dateString = newPosStr;

            if (o < -9959 || o > 9959)
                return NaN;

            int sgn = (o < 0) ? -1 : 1;
            o = labs(o);
            if (*dateString != ':') {
                offset = ((o / 100) * 60 + (o % 100)) * sgn;
            } else { // GMT+05:00
                long o2;
                if (!parseLong(dateString, &newPosStr, 10, &o2))
                    return NaN;
                dateString = newPosStr;
                offset = (o * 60 + o2) * sgn;
            }
            haveTZ = true;
        } else {
            for (int i = 0; i < int(sizeof(known_zones) / sizeof(KnownZone)); i++) {
                if (0 == strncasecmp(dateString, known_zones[i].tzName, strlen(known_zones[i].tzName))) {
                    offset = known_zones[i].tzOffset;
                    dateString += strlen(known_zones[i].tzName);
                    haveTZ = true;
                    break;
                }
            }
        }
    }

    skipSpacesAndComments(dateString);

    if (*dateString && year == -1) {
        if (!parseLong(dateString, &newPosStr, 10, &year))
            return NaN;
        dateString = newPosStr;
    }

    skipSpacesAndComments(dateString);

    // Trailing garbage
    if (*dateString)
        return NaN;

    // Y2K: Handle 2 digit years.
    if (year >= 0 && year < 100) {
        if (year < 50)
            year += 2000;
        else
            year += 1900;
    }

    return ymdhmsToSeconds(year, month + 1, day, hour, minute, second) * msPerSecond;
}
Beispiel #25
0
LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
{
    while (m_ptr < m_end && isASCIISpace(*m_ptr))
        ++m_ptr;

    ASSERT(m_ptr <= m_end);
    if (m_ptr >= m_end) {
        token.type = TokEnd;
        token.start = token.end = m_ptr;
        return TokEnd;
    }
    token.type = TokError;
    token.start = m_ptr;
    switch (*m_ptr) {
        case '[':
            token.type = TokLBracket;
            token.end = ++m_ptr;
            return TokLBracket;
        case ']':
            token.type = TokRBracket;
            token.end = ++m_ptr;
            return TokRBracket;
        case '(':
            token.type = TokLParen;
            token.end = ++m_ptr;
            return TokLBracket;
        case ')':
            token.type = TokRParen;
            token.end = ++m_ptr;
            return TokRBracket;
        case '{':
            token.type = TokLBrace;
            token.end = ++m_ptr;
            return TokLBrace;
        case '}':
            token.type = TokRBrace;
            token.end = ++m_ptr;
            return TokRBrace;
        case ',':
            token.type = TokComma;
            token.end = ++m_ptr;
            return TokComma;
        case ':':
            token.type = TokColon;
            token.end = ++m_ptr;
            return TokColon;
        case '"':
        case '\'':
            return lexString(token);

        // Numbers are trickier so we only allow the most basic form, basically
        // * [1-9][0-9]*(\.[0-9]*)?
        // * \.[0-9]*
        // * 0(\.[0-9]*)?
        case '0':
            // If a number starts with 0 it's expected to be octal.  It seems silly
            // to attempt to handle this case, so we abort
            if (m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1]))
                return TokError;
            return lexNumber(token);
        case '.':
            // If a number starts with a '.' it must be followed by a digit
            if (!(m_ptr < m_end - 1 && isASCIIDigit(m_ptr[1])))
                return TokError;
            return lexNumber(token);
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            return lexNumber(token);
    }
    return TokError;
}
bool isCSPDirectiveValueCharacter(UChar c)
{
    return isASCIISpace(c) || (c >= 0x21 && c <= 0x7e); // Whitespace + VCHAR
}