/** reads whitespace */ Token Scanner::readWhiteSpace() { QChar chr = m_src.peek(); while (chr.isSpace() && !isLineFeed(chr)) { m_src.move(); chr = m_src.peek(); } return Token(Token::Whitespace, m_src.anchor(), m_src.length()); }
/** reads single-line ruby comment, started with "#" */ Token Scanner::readComment() { QChar ch = m_src.peek(); while (!isLineFeed(ch) && !ch.isNull()) { m_src.move(); ch = m_src.peek(); } return Token(Token::Comment, m_src.anchor(), m_src.length()); }
//================================================================================== // returns true if entire file was parsed, false if it aborted for some reason //================================================================================== int32 InPlaceParser::Parse(InPlaceParserInterface *callback) { int32 ret = 0; ph_assert( callback ); if ( mData ) { int32 lineno = 0; char *foo = mData; char *begin = foo; while ( *foo ) { if ( isLineFeed(*foo) ) { ++lineno; *foo = 0; if ( *begin ) // if there is any data to parse at all... { bool snarfed = callback->preParseLine(lineno,begin); if ( !snarfed ) { int32 v = ProcessLine(lineno,begin,callback); if ( v ) ret = v; } } ++foo; if ( *foo == 10 ) ++foo; // skip line feed, if it is in the carraige-return line-feed format... begin = foo; } else { ++foo; } } lineno++; // lasst line. int32 v = ProcessLine(lineno,begin,callback); if ( v ) ret = v; } return ret; }
ReturnCode process(const pp::Token &token) { MSS_BEGIN(ReturnCode); if (state_ == MacroInclude) { if (isLineFeed(token) || isCarriageReturn(token)) { receiver_().include_detected(include_.str(), IncludeType::Macro); include_.str(""); state_ = Idle; } else include_ << token.range.content(); } else if (token.isWhitespace()) { } else { switch (state_) { case Idle: if (isMacro(token) && token.range.content() == "include") state_ = IncludeDetected; break; case IncludeDetected: if (isString(token)) { const auto content = token.range.content(); MSS(content.front() == '"' && content.back() == '"', IllegalInclude); MSS(content.size() > 2, IllegalIncludeSize); receiver_().include_detected(content.substr(1, content.size()-2), IncludeType::Local); state_ = Idle; } else if (isSymbol(token)) { MSS(token.range.content() == "<"); include_.str(""); state_ = BeginBracketInclude; } else if (isIdentifier(token)) { include_.str(""); include_ << token.range.content(); state_ = MacroInclude; } else { std::ostringstream oss; oss << "I expected either a Symbol or a String, but I got a " << toString(token.type); receiver_().include_error(oss.str()); MSS_L(UnexpectedToken); } break; case BeginBracketInclude: switch (token.type) { case Token::Identifier: include_ << token.range.content(); break; case Token::Symbol: { const auto symbol = token.range.content(); assert(symbol.size() == 1); switch (symbol[0]) { case '.': case '/': include_ << symbol; break; case '>': receiver_().include_detected(include_.str(), IncludeType::System); include_.str(""); state_ = Idle; break; default: MSS_L(UnexpectedSymbol); break; } } break; default: MSS_L(UnexpectedToken); break; } break; } } MSS_END(); }
ReturnCode process(const gubg::file::File &file) { MSS_BEGIN(ReturnCode); Range range; MSS(gubg::file::read(range, file)); pp::Lexer<std::vector<pp::Token>> lexer; MSS(lexer.tokenize(range)); enum State {Idle, IncludeDetected, BeginBracketInclude, MacroInclude}; State state = Idle; std::ostringstream incl; auto &tokens = lexer.tokens(); for (auto it = tokens.begin(); it != tokens.end(); ++it) { auto &token = *it; typedef pp::Token Token; if (state == MacroInclude) { if (isLineFeed(token) || isCarriageReturn(token)) { receiver_().includes_detected(incl.str(), IncludeType::Macro); incl.str(""); state = Idle; } else incl << token.range.content(); continue; } if (token.isWhitespace()) continue; switch (state) { case Idle: if (isMacro(token) && token.range.content() == "include") state = IncludeDetected; break; case IncludeDetected: if (isString(token)) { const auto content = token.range.content(); MSS(content.front() == '"' && content.back() == '"', IllegalInclude); MSS(content.size() > 2, IllegalIncludeSize); receiver_().includes_detected(content.substr(1, content.size()-2), IncludeType::Local); state = Idle; } else if (isSymbol(token)) { MSS(token.range.content() == "<"); incl.str(""); state = BeginBracketInclude; } else if (isIdentifier(token)) { incl.str(""); incl << token.range.content(); state = MacroInclude; } else { std::ostringstream oss; oss << "I expected either a Symbol or a String, but I got a " << toString(token.type); receiver_().includes_error(oss.str()); MSS_L(UnexpectedToken); } break; case BeginBracketInclude: switch (token.type) { case Token::Identifier: incl << token.range.content(); break; case Token::Symbol: { const auto symbol = token.range.content(); assert(symbol.size() == 1); switch (symbol[0]) { case '.': case '/': case '-': case '+': incl << symbol; break; case '>': receiver_().includes_detected(incl.str(), IncludeType::System); incl.str(""); state = Idle; break; default: MSS_L(UnexpectedSymbol); break; } } break; default: MSS_L(UnexpectedToken); break; } break; } } if (state == MacroInclude) { receiver_().includes_detected(incl.str(), IncludeType::Macro); incl.str(""); state = Idle; } MSS_END(); }
Token Scanner::readStringLiteral(QChar quoteChar, Scanner::State state) { QChar ch = m_src.peek(); if (ch == '#' && m_src.peek(1) == '{') { if (m_src.length()) { saveState(state, quoteChar); return Token(tokenKindFor(quoteChar, state), m_src.anchor(), m_src.length()); } return readInStringToken(); } if (isLineFeed(ch)) { m_src.move(); ch = m_src.peek(); m_src.setAnchor(); m_line++; } QChar startQuoteChar = translateDelimiter(quoteChar); bool quoteCharHasPair = delimiterHasPair(quoteChar); int bracketCount = 0; forever { ch = m_src.peek(); if (isLineFeed(ch) || ch.isNull()) { saveState(state, quoteChar); break; } if (ch == quoteChar && bracketCount == 0) break; // handles %r{{}} if (quoteCharHasPair) { if (ch == startQuoteChar) { bracketCount++; m_src.move(); continue; } else if (ch == quoteChar) { bracketCount--; m_src.move(); continue; } } if (ch == '\\') { m_src.move(); ch = m_src.peek(); m_src.move(); if (isLineFeed(ch) || ch.isNull()) { saveState(state, quoteChar); break; } } else if (quoteChar != '\'' && ch == '#' && m_src.peek(1) == '{') { saveState(state, quoteChar); break; } else if (isLineFeed(ch) || ch.isNull()) { saveState(state, quoteChar); break; } else { m_src.move(); } } if (ch == quoteChar) { m_src.move(); if (state == State_Regexp) consumeRegexpModifiers(); clearState(); } return Token(tokenKindFor(quoteChar, state), m_src.anchor(), m_src.length()); }
Token Scanner::onDefaultState() { QChar first = m_src.peek(); m_src.move(); // Ignore new lines bool hasNewLine = false; while (isLineFeed(first)) { hasNewLine = true; m_line++; m_lineStartOffset = m_src.position(); first = m_src.peek(); m_src.setAnchor(); m_src.move(); } if (hasNewLine) m_tokenSequence.clear(); Token token; if (first.isDigit()) { token = readFloatNumber(); } else if (first == '\'' || first == '\"' || first == '`') { token = readStringLiteral(first, State_String); } else if (m_methodPattern.match(m_tokenSequence).hasMatch()) { token = readMethodDefinition(); } else if (first.isLetter() || first == '_' || first == '@' || first == '$' || (first == ':' && m_src.peek() != ':')) { token = readIdentifier(); } else if (first.isDigit()) { token = readNumber(); } else if (first == '#') { token = readComment(); } else if (first == '/') { token = readRegexp(); } else if (first.isSpace()) { token = readWhiteSpace(); } else if (first == ',') { token = Token(Token::OperatorComma, m_src.anchor(), m_src.length()); } else if (first == '.') { token = Token(Token::OperatorDot, m_src.anchor(), m_src.length()); } else if (first == '=' && m_src.peek() != '=') { token = Token(Token::OperatorAssign, m_src.anchor(), m_src.length()); } else if (first == ';') { token = Token(Token::OperatorSemiColon, m_src.anchor(), m_src.length()); } else if (first == '%') { token = readPercentageNotation(); } else if (first == '{') { token = Token(Token::OpenBraces, m_src.anchor(), m_src.length()); } else if (first == '}') { token = Token(Token::CloseBraces, m_src.anchor(), m_src.length()); } else if (first == '[') { token = Token(Token::OpenBrackets, m_src.anchor(), m_src.length()); } else if (first == ']') { token = Token(Token::CloseBrackets, m_src.anchor(), m_src.length()); // For historic reasons, ( and ) are the Operator token, this will // be changed soon. } else if (first == '(' || first == ')') { token = Token(Token::Operator, m_src.anchor(), m_src.length()); } else { token = readOperator(first); } m_tokenSequence += QString::number(token.kind); m_tokenSequence += '_'; return token; }