/**
  reads whitespace
  */
Token Scanner::readWhiteSpace()
{
    QChar chr = m_src.peek();
    while (chr.isSpace() && !isLineFeed(chr)) {
        m_src.move();
        chr = m_src.peek();
    }
    return Token(Token::Whitespace, m_src.anchor(), m_src.length());
}
/**
  reads single-line ruby comment, started with "#"
  */
Token Scanner::readComment()
{
    QChar ch = m_src.peek();
    while (!isLineFeed(ch) && !ch.isNull()) {
        m_src.move();
        ch = m_src.peek();
    }
    return Token(Token::Comment, m_src.anchor(), m_src.length());
}
Exemple #3
0
	//==================================================================================
	// returns true if entire file was parsed, false if it aborted for some reason
	//==================================================================================
	int32  InPlaceParser::Parse(InPlaceParserInterface *callback)
	{
		int32 ret = 0;
		ph_assert( callback );
		if ( mData )
		{
			int32 lineno = 0;

			char *foo   = mData;
			char *begin = foo;

			while ( *foo )
			{
				if ( isLineFeed(*foo) )
				{
					++lineno;
					*foo = 0;
					if ( *begin ) // if there is any data to parse at all...
					{
						bool snarfed = callback->preParseLine(lineno,begin);
						if ( !snarfed )
						{
							int32 v = ProcessLine(lineno,begin,callback);
							if ( v )
								ret = v;
						}
					}

					++foo;
					if ( *foo == 10 )
						++foo; // skip line feed, if it is in the carraige-return line-feed format...
					begin = foo;
				}
				else
				{
					++foo;
				}
			}

			lineno++; // lasst line.

			int32 v = ProcessLine(lineno,begin,callback);
			if ( v )
				ret = v;
		}
		return ret;
	}
    ReturnCode process(const pp::Token &token)
    {
        MSS_BEGIN(ReturnCode);

        if (state_ == MacroInclude)
        {
            if (isLineFeed(token) || isCarriageReturn(token))
            {
                receiver_().include_detected(include_.str(), IncludeType::Macro);
                include_.str("");
                state_ = Idle;
            }
            else
                include_ << token.range.content();
        }
        else if (token.isWhitespace())
        {
        }
        else
        {
            switch (state_)
            {
            case Idle:
                if (isMacro(token) && token.range.content() == "include")
                    state_ = IncludeDetected;
                break;
            case IncludeDetected:
                if (isString(token))
                {
                    const auto content = token.range.content();
                    MSS(content.front() == '"' && content.back() == '"', IllegalInclude);
                    MSS(content.size() > 2, IllegalIncludeSize);
                    receiver_().include_detected(content.substr(1, content.size()-2), IncludeType::Local);
                    state_ = Idle;
                }
                else if (isSymbol(token))
                {
                    MSS(token.range.content() == "<");
                    include_.str("");
                    state_ = BeginBracketInclude;
                }
                else if (isIdentifier(token))
                {
                    include_.str("");
                    include_ << token.range.content();
                    state_ = MacroInclude;
                }
                else
                {
                    std::ostringstream oss;
                    oss << "I expected either a Symbol or a String, but I got a " << toString(token.type);
                    receiver_().include_error(oss.str());
                    MSS_L(UnexpectedToken);
                }
                break;
            case BeginBracketInclude:
                switch (token.type)
                {
                case Token::Identifier:
                    include_ << token.range.content();
                    break;
                case Token::Symbol:
                {
                    const auto symbol = token.range.content();
                    assert(symbol.size() == 1);
                    switch (symbol[0])
                    {
                    case '.':
                    case '/':
                        include_ << symbol;
                        break;
                    case '>':
                        receiver_().include_detected(include_.str(), IncludeType::System);
                        include_.str("");
                        state_ = Idle;
                        break;
                    default:
                        MSS_L(UnexpectedSymbol);
                        break;
                    }
                }
                break;
                default:
                    MSS_L(UnexpectedToken);
                    break;
                }
                break;
            }
        }
        MSS_END();
    }
Exemple #5
0
                        ReturnCode process(const gubg::file::File &file)
                        {
                            MSS_BEGIN(ReturnCode);
                            Range range;
                            MSS(gubg::file::read(range, file));
                            pp::Lexer<std::vector<pp::Token>> lexer;
                            MSS(lexer.tokenize(range));
                            enum State {Idle, IncludeDetected, BeginBracketInclude, MacroInclude};
                            State state = Idle;
                            std::ostringstream incl;
                            auto &tokens = lexer.tokens();
                            for (auto it = tokens.begin(); it != tokens.end(); ++it)
                            {
                                auto &token = *it;
                                typedef pp::Token Token;

                                if (state == MacroInclude)
                                {
                                    if (isLineFeed(token) || isCarriageReturn(token))
                                    {
                                        receiver_().includes_detected(incl.str(), IncludeType::Macro);
                                        incl.str("");
                                        state = Idle;
                                    }
                                    else
                                        incl << token.range.content();
                                    continue;
                                }

                                if (token.isWhitespace())
                                    continue;

                                switch (state)
                                {
                                    case Idle:
                                        if (isMacro(token) && token.range.content() == "include")
                                            state = IncludeDetected;
                                        break;
                                    case IncludeDetected:
                                        if (isString(token))
                                        {
                                            const auto content = token.range.content();
                                            MSS(content.front() == '"' && content.back() == '"', IllegalInclude);
                                            MSS(content.size() > 2, IllegalIncludeSize);
                                            receiver_().includes_detected(content.substr(1, content.size()-2), IncludeType::Local);
                                            state = Idle;
                                        }
                                        else if (isSymbol(token))
                                        {
                                            MSS(token.range.content() == "<");
                                            incl.str("");
                                            state = BeginBracketInclude;
                                        }
                                        else if (isIdentifier(token))
                                        {
                                            incl.str("");
                                            incl << token.range.content();
                                            state = MacroInclude;
                                        }
                                        else
                                        {
                                            std::ostringstream oss;
                                            oss << "I expected either a Symbol or a String, but I got a " << toString(token.type);
                                            receiver_().includes_error(oss.str());
                                            MSS_L(UnexpectedToken);
                                        }
                                        break;
                                    case BeginBracketInclude:
                                        switch (token.type)
                                        {
                                            case Token::Identifier:
                                                incl << token.range.content();
                                                break;
                                            case Token::Symbol:
                                                {
                                                    const auto symbol = token.range.content();
                                                    assert(symbol.size() == 1);
                                                    switch (symbol[0])
                                                    {
                                                        case '.':
                                                        case '/':
                                                        case '-':
                                                        case '+':
                                                            incl << symbol;
                                                            break;
                                                        case '>':
                                                            receiver_().includes_detected(incl.str(), IncludeType::System);
                                                            incl.str("");
                                                            state = Idle;
                                                            break;
                                                        default:
                                                            MSS_L(UnexpectedSymbol);
                                                            break;
                                                    }
                                                }
                                                break;
                                            default:
                                                MSS_L(UnexpectedToken);
                                                break;
                                        }
                                        break;
                                }
                            }
                            if (state == MacroInclude)
                            {
                                receiver_().includes_detected(incl.str(), IncludeType::Macro);
                                incl.str("");
                                state = Idle;
                            }
                            MSS_END();
                        }
Token Scanner::readStringLiteral(QChar quoteChar, Scanner::State state)
{
    QChar ch = m_src.peek();

    if (ch == '#' && m_src.peek(1) == '{') {
        if (m_src.length()) {
            saveState(state, quoteChar);
            return Token(tokenKindFor(quoteChar, state), m_src.anchor(), m_src.length());
        }
        return readInStringToken();
    }

    if (isLineFeed(ch)) {
        m_src.move();
        ch = m_src.peek();
        m_src.setAnchor();
        m_line++;
    }

    QChar startQuoteChar = translateDelimiter(quoteChar);
    bool quoteCharHasPair = delimiterHasPair(quoteChar);
    int bracketCount = 0;

    forever {
        ch = m_src.peek();
        if (isLineFeed(ch) || ch.isNull()) {
            saveState(state, quoteChar);
            break;
        }

        if (ch == quoteChar && bracketCount == 0)
            break;

        // handles %r{{}}
        if (quoteCharHasPair) {
            if (ch == startQuoteChar) {
                bracketCount++;
                m_src.move();
                continue;
            } else if (ch == quoteChar) {
                bracketCount--;
                m_src.move();
                continue;
            }
        }

        if (ch == '\\') {
            m_src.move();
            ch = m_src.peek();
            m_src.move();
            if (isLineFeed(ch) || ch.isNull()) {
                saveState(state, quoteChar);
                break;
            }
        } else if (quoteChar != '\'' && ch == '#' && m_src.peek(1) == '{') {
            saveState(state, quoteChar);
            break;
        } else if (isLineFeed(ch) || ch.isNull()) {
            saveState(state, quoteChar);
            break;
        } else {
            m_src.move();
        }
    }

    if (ch == quoteChar) {
        m_src.move();
        if (state == State_Regexp)
            consumeRegexpModifiers();
        clearState();
    }

    return Token(tokenKindFor(quoteChar, state), m_src.anchor(), m_src.length());
}
Token Scanner::onDefaultState()
{
    QChar first = m_src.peek();
    m_src.move();

    // Ignore new lines
    bool hasNewLine = false;
    while (isLineFeed(first)) {
        hasNewLine = true;
        m_line++;
        m_lineStartOffset = m_src.position();
        first = m_src.peek();
        m_src.setAnchor();
        m_src.move();
    }
    if (hasNewLine)
        m_tokenSequence.clear();

    Token token;

    if (first.isDigit()) {
        token = readFloatNumber();
    } else if (first == '\'' || first == '\"' || first == '`') {
        token = readStringLiteral(first, State_String);
    } else if (m_methodPattern.match(m_tokenSequence).hasMatch()) {
        token = readMethodDefinition();
    } else if (first.isLetter() || first == '_' || first == '@'
               || first == '$' || (first == ':' && m_src.peek() != ':')) {
        token = readIdentifier();
    } else if (first.isDigit()) {
        token = readNumber();
    } else if (first == '#') {
        token = readComment();
    } else if (first == '/') {
        token = readRegexp();
    } else if (first.isSpace()) {
        token = readWhiteSpace();
    } else if (first == ',') {
        token = Token(Token::OperatorComma, m_src.anchor(), m_src.length());
    } else if (first == '.') {
        token = Token(Token::OperatorDot, m_src.anchor(), m_src.length());
    } else if (first == '=' && m_src.peek() != '=') {
        token = Token(Token::OperatorAssign, m_src.anchor(), m_src.length());
    } else if (first == ';') {
        token = Token(Token::OperatorSemiColon, m_src.anchor(), m_src.length());
    } else if (first == '%') {
        token = readPercentageNotation();
    } else if (first == '{') {
        token = Token(Token::OpenBraces, m_src.anchor(), m_src.length());
    } else if (first == '}') {
        token = Token(Token::CloseBraces, m_src.anchor(), m_src.length());
    } else if (first == '[') {
        token = Token(Token::OpenBrackets, m_src.anchor(), m_src.length());
    } else if (first == ']') {
        token = Token(Token::CloseBrackets, m_src.anchor(), m_src.length());
        // For historic reasons, ( and ) are the Operator token, this will
        // be changed soon.
    } else if (first == '(' || first == ')') {
        token = Token(Token::Operator, m_src.anchor(), m_src.length());
    } else {
        token = readOperator(first);
    }

    m_tokenSequence += QString::number(token.kind);
    m_tokenSequence += '_';

    return token;
}