void Scanner::handleLineComment() { loc_ = getTokenLocation(); if (currentChar_ == '(' && peekChar() == '*') { // eat * getNextChar(); // update currentChar_ getNextChar(); while (!(currentChar_ == '*' && peekChar() == ')')) { // skip comment content getNextChar(); // accident EOF if (input_.eof()) { errorToken(getTokenLocation().toString() + "end of file happended in comment, *) is expected!, but find " + currentChar_); errorFlag_ = true; break; } } if (!input_.eof()) { // eat * getNextChar(); // eat ) and update currentChar_ getNextChar(); } } }
KAbstractHdrParserPrivate::token_id KAbstractHdrParserPrivate::lexTokenKeyValue(KAbstractLexer::token_type &token) { // Read Key m_key = currChar(); for (;;) { if (peekChar() != '=') { m_key += nextChar(); } else { nextChar(); break; } } // Read Value m_value = nextChar(); for (;;) { if (peekChar() != '\n') { m_value += nextChar(); } else { nextChar(); break; } } return PT_KEYVALUE; }
long readHex() { if ( err ) return 0; skipWhitespace(); // hex must start with alphanumeric character char ch; if ( !peekChar(&ch) || !isalnum(ch) ) { err = TextFile::ERROR_PARSE; return 0; } long x = 0; while ( peekChar(&ch) ) { switch ( ch ) { case '0': x <<= 4; x += 0; break; case '1': x <<= 4; x += 1; break; case '2': x <<= 4; x += 2; break; case '3': x <<= 4; x += 3; break; case '4': x <<= 4; x += 4; break; case '5': x <<= 4; x += 5; break; case '6': x <<= 4; x += 6; break; case '7': x <<= 4; x += 7; break; case '8': x <<= 4; x += 8; break; case '9': x <<= 4; x += 9; break; case 'a': case 'A': x <<= 4; x += 0xA; break; case 'b': case 'B': x <<= 4; x += 0xB; break; case 'c': case 'C': x <<= 4; x += 0xC; break; case 'd': case 'D': x <<= 4; x += 0xD; break; case 'e': case 'E': x <<= 4; x += 0xE; break; case 'f': case 'F': x <<= 4; x += 0xF; break; default: return x; } readChar( &ch ); } return x; }
void Scanner::handleOperationState() { loc_ = getTokenLocation(); bool matched = false; // add current symbol char addToBuffer(currentChar_); // add next one symbol char addToBuffer(peekChar()); if (dictionary_.haveToken(buffer_)) { matched = true; getNextChar(); } else { reduceBuffer(); } auto tokenMeta = dictionary_.lookup(buffer_); // token type, token value, name, symbol precedence makeToken(std::get<0>(tokenMeta), std::get<1>(tokenMeta), loc_, buffer_, std::get<2>(tokenMeta)); // update currentChar_ getNextChar(); }
int KAbstractObjParserPrivate::lexReadInteger(int *sign, int *power) { *sign = 1; int pow = 10; int integer = 0; // Check for negation if (currChar() == '-') *sign = -1; else if (currChar() == '+') ; // Do nothing, sign is already 1 else integer = Karma::ctoi(currChar()); // Read the integer value while (Karma::isNumeric(peekChar())) { pow *= 10; integer *= 10; integer += Karma::ctoi(nextChar()); } (*power) = pow; return integer; }
// Parse one simple selector, e.g.: `type#id.class1.class2.class3` SimpleSelector CSSParser::parseSimpleSelector(){ QString tagName; QString id; QVector<QString> classes; if(!eof()){ QChar c = peekChar(); if(c == '#'){ consumeChar('#'); id = parseIdentifier(); qDebug() << "#"; } else if(c == '.') { consumeChar('.'); classes.append(parseIdentifier()); qDebug() << "."; } else if(c == '*'){ consumeChar('*');//TODO qDebug() << "*"; } else if(isVaidIdentifier(c)){ //h1 div.note tagName = parseIdentifier(); } } SimpleSelector ret{tagName, id, classes}; #ifdef CSS_DEBUG qDebug() << Q_FUNC_INFO; #endif /* CSS_DEBUG */ return ret; }
/*! Get UCS-2 character from stream \param ucs Reference to the character Returns single character for CRLF combination */ bool ImportStream::getChar(UT_UCSChar &ucs) { if (!getRawChar(ucs)) return false; if (ucs == UCS_CR && peekChar() == UCS_LF) getRawChar(ucs); return true; }
bool readChar( char* ch ) { if ( err ) return false; bool more = peekChar( ch ); m_peeked = false; if ( more && *ch == '\n' ) ++line; return more; }
inline bool ConsumerParser::readChar(char c) { if (_skipBlank) skipBlanks(); if (peekChar(c)) // He'll call addData() { _rwHeader++; return (true); } return (false); }
bool getChar(char c, CurrPtr& curr, EndPtr end) { if (!more(curr, end)) return false; if (peekChar(curr, end) != c) return false; getChar(curr, end); return true; }
// ipv6HexDigit4 *(':' ipv6HexDigit4) ['::' [ipv6HexSeq]] // where the first component, ipv6HexDigit4 is already parsed FlowToken FlowLexer::continueParseIPv6(bool firstComplete) { bool rv = true; if (firstComplete) { while (currentChar() == ':' && peekChar() != ':') { stringValue_ += ':'; nextChar(); if (!ipv6HexDigit4()) return false; } if (currentChar() == ':' && peekChar() == ':') { stringValue_ += "::"; nextChar(); nextChar(); rv = isHexChar() ? ipv6HexSeq() : true; } } else { ipv6HexDigits_ = stringValue_.size(); rv = ipv6HexPart(); } // parse embedded IPv4 remainer while (currentChar_ == '.' && std::isdigit(peekChar())) { stringValue_ += '.'; nextChar(); while (std::isdigit(currentChar_)) { stringValue_ += static_cast<char>(currentChar_); nextChar(); } } if (rv && ipValue_.set(stringValue_.c_str(), IPAddress::V6)) return token_ = FlowToken::IP; else return token_ = FlowToken::Unknown; }
// IPv6_HexPart ::= IPv6_HexSeq # (1) // | IPv6_HexSeq "::" [IPv6_HexSeq] # (2) // | "::" [IPv6_HexSeq] # (3) // bool FlowLexer::ipv6HexPart() { bool rv; if (currentChar() == ':' && peekChar() == ':') { // (3) stringValue_ = "::"; nextChar(); // skip ':' nextChar(); // skip ':' rv = isHexChar() ? ipv6HexSeq() : true; } else if (!!(rv = ipv6HexSeq())) { if (currentChar() == ':' && peekChar() == ':') { // (2) stringValue_ += "::"; nextChar(); // skip ':' nextChar(); // skip ':' rv = isHexChar() ? ipv6HexSeq() : true; } } if (std::isalnum(currentChar_) || currentChar_ == ':') rv = false; return rv; }
bool skipWhitespace() { if ( err ) return false; char ch; while ( peekChar(&ch) ) { if ( !isspace(ch) ) break; readChar( &ch ); } return !eof(); }
// 1*4HEXDIGIT *(':' 1*4HEXDIGIT) bool FlowLexer::ipv6HexSeq() { if (!ipv6HexDigit4()) return false; while (currentChar() == ':' && peekChar() != ':') { stringValue_ += ':'; nextChar(); if (!ipv6HexDigit4()) return false; } return true; }
KAbstractObjParserPrivate::token_id KAbstractObjParserPrivate::lexTokenInteger(token_type &token) { int sign; int integer = lexReadInteger(&sign); if (peekChar() == '.') { nextChar(); nextChar(); // Eat the decimal return lexTokenFloat(token, sign, integer); } // We've read an integer, set token attributes. token.m_attribute.asInteger = sign * integer; return PT_INTEGER; }
KAbstractObjParserPrivate::token_id KAbstractObjParserPrivate::lexTokenIdentifier(token_type &token) { // Read and resolve symbol token.m_lexicon = currChar(); for (;;) { if (Karma::isAlpha(peekChar())) { token.m_lexicon += nextChar(); } else { return symResolve(token, PT_STRING); } } }
//auto, #000000, 78px QSharedPointer<Value> CSSParser::parseValue(){ #ifdef CSS_DEBUG qDebug() << Q_FUNC_INFO; #endif /* CSS_DEBUG */ QChar c = peekChar(); if (c.isDigit()) { return parseLength(); } else if (c == '#') { return parseColor(); } else { // QSharedPointer<KeywordValue> ret = QSharedPointer<KeywordValue>(new KeywordValue(parseIdentifier())); return QSharedPointer<KeywordValue>(new KeywordValue(parseIdentifier())); } qDebug() << Q_FUNC_INFO << "enter a dead end"; }
//{ margin: auto; color: #cc0000; } QVector<Declaration> CSSParser::parseDeclarations(){ consumeChar('{'); QVector<Declaration> declarations; // while(peekChar() != '}'){ // declarations.append(parseDeclaration()); // } while (true) { consumeWhitespaceOrNewline(); if (peekChar() == '}') { consumeChar(); break; } declarations.append(parseDeclaration()); } return declarations; }
KAbstractObjParserPrivate::token_id KAbstractObjParserPrivate::lexTokenFloat(token_type &token, int sign, int integer) { int power, powSign; int fraction = lexReadInteger(&powSign, &power); float decimal = double(fraction) / power; float value = sign * (integer + decimal); if (Karma::toLower(peekChar()) == 'e') { nextChar(); nextChar(); // Eat exponent return lexTokenFloatExponent(token, value); } // We've read a float, set token attributes. token.m_attribute.asFloat = value; return PT_FLOAT; }
static void peekInt(void) { current.kind = tINT; while (1) { const int c = peekChar(); switch (c) { case '0' ... '9': consumeChar(); current.value = current.value * 10 + (c - '0'); break; case '_': consumeChar(); break; default: return; } } }
void Scanner::handleStringState() { loc_ = getTokenLocation(); // eat ' and NOT update currentChar_ // because we don't want ' (single quote). getNextChar(); while (true) { if (currentChar_ == '\'') { // '''' condition // see pascal standard section 6.1.7 if (peekChar() == '\'') { getNextChar(); } // otherwise, we have handle string literal completely. else { break; } } addToBuffer(currentChar_); getNextChar(); } // eat end ' and update currentChar_ . getNextChar(); // just one char if (buffer_.length() == 1) { makeToken(TokenType::CHAR, TokenValue::UNRESERVED, loc_, static_cast<long>(buffer_.at(0)), buffer_); } else { makeToken(TokenType::STRING_LITERAL, TokenValue::UNRESERVED, loc_, buffer_, -1); } }
QVector<SimpleSelector> CSSParser::parseSimpleSelectors(){ QVector<SimpleSelector> selectors; while (true) { consumeWhitespaceOrNewline(); selectors.append(parseSimpleSelector()); consumeWhitespaceOrNewline(); QChar c = peekChar(); if (c == ',') { consumeChar(); } else if (c == '{') { break; } else { qDebug() << "Illegal character in selector list"; } } //todo sort selectors by specificity return selectors; }
bool readString( char* buf, int size ) { if ( err ) return false; skipWhitespace(); int count = 0; char ch; while ( peekChar(&ch) ) { if ( isspace(ch) ) break; if ( count+1 < size ) buf[count++] = ch; readChar( &ch ); } if ( size > 0 ) buf[count] = 0; return count > 0; }
static void peekId(void) { current.kind = tID; int len = 0; while (1) { const int c = peekChar(); switch (c) { case 'a' ... 'z': case '0' ... '9': consumeChar(); len++; current.ptr = realloc(current.ptr, len + 1); current.ptr[len - 1] = c; current.ptr[len] = 0; break; default: if (strcmp(current.ptr, "if") == 0) { current.kind = tIF; } else if (strcmp(current.ptr, "else") == 0) { current.kind = tELSE; } else if (strcmp(current.ptr, "while") == 0) { current.kind = tWHILE; } else if (strcmp(current.ptr, "print") == 0) { current.kind = tPRINT; } else if (strcmp(current.ptr, "fun") == 0) { current.kind = tFUN; } else if (strcmp(current.ptr, "return") == 0) { current.kind = tRETURN; } else { current.kind = tID; } if (current.kind != tID) { free(current.ptr); current.ptr = 0; } return; } } }
void Scanner::handleFraction() { // currentChar_ is . (dot) // if we have number 4..12. just simple error condition. // our compiler has one big difference compared with // commercial compiler, that is about error conditions. if (peekChar() == '.') { errorToken(getTokenLocation().toString() + "Fraction number can not have dot after dot"); errorFlag_ = true; } // eat . addToBuffer(currentChar_); getNextChar(); while (std::isdigit(currentChar_)) { addToBuffer(currentChar_); getNextChar(); } }
void lexPPNumber (basl::Token & token) { // do nothing if not a pp-number if (token.getNumber () != PP_NUMBER_TOKEN) { return; } // lex states enum { // start S_START, // zero S_ZERO, // zero followed by X S_ZERO_X, // octal integer S_OCTAL_INT, // hex integer S_HEX_INT, // integer S_INT, // integer with l suffix S_INT_L_SUFFIX, // integer with u suffix S_INT_U_SUFFIX, // integer with ul suffix S_INT_UL_SUFFIX, // integer with ll suffix S_INT_LL_SUFFIX, // integer with i suffix, 64 next S_INT_I_SUFFIX, // integer with i6 suffix, 4 next S_INT_I6_SUFFIX, // integer with ui suffix, 64 next S_INT_UI_SUFFIX, // integer with ui6 suffix, 4 next S_INT_UI6_SUFFIX, // integer with suffix S_INT_SUFFIX, // float S_FLOAT, // float followed by E S_FLOAT_E, // float followed by E and sign S_FLOAT_E_SIGN, // float exponent (number) S_FLOAT_EXP, // float followed by suffix S_FLOAT_SUFFIX, // . S_DOT }; util::IdentTable & ident_table = util::getIdentTable (); util::Loc const & loc = token.getLoc (); char const * s = token.getLexeme ().c_str (); // true if octal error bool octal_error = false; int kind = 0; // true if message output while lexing bool error = false; int state = S_START; for (;;) { char ch = * s; switch (state) { // start case S_START: { if (ch == '0') { state = S_ZERO; } else if (ch >= '1' && ch <= '9') { state = S_INT; } else { assert (ch == '.'); state = S_DOT; } break; } // zero case S_ZERO: { if (ch == 'x' || ch == 'X') { state = S_ZERO_X; } else if (ch == '.') { state = S_FLOAT; } else if (ch == 'E' || ch == 'e') { state = S_FLOAT_E; } else if (ch == 'L' || ch == 'l') { state = S_INT_L_SUFFIX; } else if (ch == 'U' || ch == 'u') { state = S_INT_U_SUFFIX; } else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4') { state = S_INT_I_SUFFIX; } else if (ch >= '0' && ch <= '7') { state = S_OCTAL_INT; } else if (ch >= '8' && ch <= '9') { // can't be an octal, maybe float octal_error = true; state = S_INT; } else { // this is a *zero* int literal token kind = ZERO_TOKEN; } break; } // zero followed by x case S_ZERO_X: { if (ch >= '0' && ch <= '9' || ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F') { state = S_HEX_INT; } else { // missing hex digit, append 0 to make valid msg::missingHexDigitsInIntLiteral (loc); error = true; // try to recover ident_table.push ('0'); if (ch == 'L' || ch == 'l') { state = S_INT_L_SUFFIX; } else if (ch == 'U' || ch == 'u') { state = S_INT_U_SUFFIX; } else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4') { state = S_INT_I_SUFFIX; } else { kind = LITERAL_INT_TOKEN; } } break; } // hexadecimal integer case S_HEX_INT: { if (ch == 'L' || ch == 'l') { state = S_INT_L_SUFFIX; } else if (ch == 'U' || ch == 'u') { state = S_INT_U_SUFFIX; } else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4') { state = S_INT_I_SUFFIX; } else if (ch >= '0' && ch <= '9' || ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F') { // stay in same state } else { kind = LITERAL_INT_TOKEN; } break; } // octal integer case S_OCTAL_INT: { if (ch == '.') { state = S_FLOAT; } else if (ch == 'E' || ch == 'e') { state = S_FLOAT_E; } else if (ch == 'L' || ch == 'l') { state = S_INT_L_SUFFIX; } else if (ch == 'U' || ch == 'u') { state = S_INT_U_SUFFIX; } else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4') { state = S_INT_I_SUFFIX; } else if (ch >= '0' && ch <= '7') { // stay in same state } else if (ch >= '8' && ch <= '9') { // we've started as a octal but now have an 8 or a 9, if this token isn't a float then it's // an invalid octal int octal_error = true; state = S_INT; } else { kind = LITERAL_INT_TOKEN; } break; } // decimal integer case S_INT: { if (ch == '.') { state = S_FLOAT; } else if (ch == 'E' || ch == 'e') { state = S_FLOAT_E; } else if (ch == 'L' || ch == 'l') { state = S_INT_L_SUFFIX; } else if (ch == 'U' || ch == 'u') { state = S_INT_U_SUFFIX; } else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4') { state = S_INT_I_SUFFIX; } else if (ch >= '0' && ch <= '9') { // stay in same state } else { kind = LITERAL_INT_TOKEN; } break; } // integer with l suffix case S_INT_L_SUFFIX: { if (ch == 'U' || ch == 'u') { // 'lu' suffix so an int, we'll not allow an 'lul' suffix for 'unsinged long long' state = S_INT_SUFFIX; } else if (ch == 'L' || ch == 'l') { state = S_INT_LL_SUFFIX; } else { kind = LITERAL_INT_TOKEN; } break; } // integer with u suffix case S_INT_U_SUFFIX: { if (ch == 'L' || ch == 'l') { // could still be an 'ull' suffix state = S_INT_UL_SUFFIX; } else if (ch == 'i' && peekChar (s, 1) == '6' && peekChar (s, 2) == '4') { state = S_INT_UI_SUFFIX; } else { kind = LITERAL_INT_TOKEN; } break; } // integer with ul suffix case S_INT_UL_SUFFIX: { if (ch == 'L' || ch == 'l') { state = S_INT_SUFFIX; } else { kind = LITERAL_INT_TOKEN; } break; } // integer with ll suffix case S_INT_LL_SUFFIX: { if (ch == 'U' || ch == 'u') { state = S_INT_SUFFIX; } else { kind = LITERAL_INT_TOKEN; } break; } // integer with i suffix, 64 next case S_INT_I_SUFFIX: { state = S_INT_I6_SUFFIX; break; } // integer with i6 suffix, 4 next case S_INT_I6_SUFFIX: { state = S_INT_LL_SUFFIX; break; } // integer with ui suffix, 64 next case S_INT_UI_SUFFIX: { state = S_INT_UI6_SUFFIX; break; } // integer with ui6 suffix, 4 next case S_INT_UI6_SUFFIX: { state = S_INT_SUFFIX; break; } // int with suffix case S_INT_SUFFIX: { kind = LITERAL_INT_TOKEN; break; } // float case S_FLOAT: { if (ch == 'E' || ch == 'e') { state = S_FLOAT_E; } else if (ch == 'F' || ch == 'f' || ch == 'L' || ch == 'l') { state = S_FLOAT_SUFFIX; } else if (ch >= '0' && ch <= '9') { // stay in same state } else { kind = LITERAL_FLOAT_TOKEN; } break; } // float with e case S_FLOAT_E: { if (ch == '+' || ch == '-') { state = S_FLOAT_E_SIGN; } else if (ch >= '0' && ch <= '9') { state = S_FLOAT_EXP; } else { // missing exponent, append 0 to make valid msg::missingExponentInFloatingLiteral (loc); error = true; // try to recover ident_table.push ('0'); if (ch == 'F' || ch == 'f' || ch == 'L' || ch == 'l') { state = S_FLOAT_SUFFIX; } else { kind = LITERAL_FLOAT_TOKEN; } } break; } // float with e and sign case S_FLOAT_E_SIGN: { if (ch >= '0' && ch <= '9') { state = S_FLOAT_EXP; } else { // missing exponent, append 0 to make valid msg::missingExponentInFloatingLiteral (loc); error = true; // try to recover ident_table.push ('0'); if (ch == 'F' || ch == 'f' || ch == 'L' || ch == 'l') { state = S_FLOAT_SUFFIX; } else { kind = LITERAL_FLOAT_TOKEN; } } break; } // float exponent case S_FLOAT_EXP: { if (ch == 'F' || ch == 'f' || ch == 'L' || ch == 'l') { state = S_FLOAT_SUFFIX; } else if (ch >= '0' && ch <= '9') { // stay in same state } else { kind = LITERAL_FLOAT_TOKEN; } break; } // float with suffix case S_FLOAT_SUFFIX: { kind = LITERAL_FLOAT_TOKEN; break; } // dot case S_DOT: { assert (ch >= '0' && ch <= '9'); state = S_FLOAT; break; } } if (kind != 0) { break; } // char part of lexeme ident_table.push (ch); ++ s; } // warn if octal error util::Ident lexeme = ident_table.getIdent (); if (kind == LITERAL_INT_TOKEN && octal_error) { msg::octalHasDigitsBeyondRadix (loc, lexeme); } // set the lexeme if not the same if (lexeme != token.getLexeme ()) { // any chars remaining? if (* s != 0) { if (kind == LITERAL_FLOAT_TOKEN) { msg::invalidFloatingLiteralSuffix (loc, s, lexeme); } else { // otherwise literal int or literal zero token msg::invalidIntLiteralSuffix (loc, s, lexeme); } } // change the lexeme so expr will be able to correctly get is value token.setLexeme (lexeme); } // and finally, set it's real kind token.setNumber (kind); }
static int getToken() { const char tab[] = "abfnrtv"; const char backTab[] = "\a\b\f\n\r\t\v"; uint n; bool quiet; yyIdentLen = 0; yyCommentLen = 0; yyStringLen = 0; while ( yyCh != EOF ) { yyLineNo = yyCurLineNo; if ( isalpha(yyCh) || yyCh == '_' ) { do { if ( yyIdentLen < sizeof(yyIdent) - 1 ) yyIdent[yyIdentLen++] = (char) yyCh; yyCh = getChar(); } while ( isalnum(yyCh) || yyCh == '_' ); yyIdent[yyIdentLen] = '\0'; bool might_be_str = false; switch ( yyIdent[0] ) { case 'N': if ( strcmp(yyIdent + 1, "one") == 0 ) return Tok_None; break; case 'Q': if (strcmp(yyIdent + 1, "T_TR_NOOP") == 0) { yyParsingUtf8 = false; return Tok_tr; } else if (strcmp(yyIdent + 1, "T_TR_NOOP_UTF8") == 0) { yyParsingUtf8 = true; return Tok_trUtf8; } else if (strcmp(yyIdent + 1, "T_TRANSLATE_NOOP") == 0) { yyParsingUtf8 = false; return Tok_translate; } break; case 'c': if ( strcmp(yyIdent + 1, "lass") == 0 ) return Tok_class; break; case 'f': /* * QTranslator::findMessage() has the same parameters as * QApplication::translate(). */ if ( strcmp(yyIdent + 1, "indMessage") == 0 ) return Tok_translate; break; case 'r': if ( strcmp(yyIdent + 1, "eturn") == 0 ) return Tok_return; /* Drop through. */ case 'R': if (yyIdent[1] == '\0') might_be_str = true; break; case 'b': case 'B': case 'u': case 'U': if (yyIdent[1] == '\0') might_be_str = true; else if ((yyIdent[1] == 'r' || yyIdent[1] == 'R') && yyIdent[2] == '\0') might_be_str = true; break; case 't': if ( strcmp(yyIdent + 1, "r") == 0 ) { yyParsingUtf8 = false; return Tok_tr; } else if ( qstrcmp(yyIdent + 1, "rUtf8") == 0 ) { yyParsingUtf8 = true; return Tok_trUtf8; } else if ( qstrcmp(yyIdent + 1, "ranslate") == 0 ) { yyParsingUtf8 = false; return Tok_translate; } break; case '_': if ( strcmp(yyIdent + 1, "_tr") == 0 ) { yyParsingUtf8 = false; return Tok_tr; } else if ( strcmp(yyIdent + 1, "_trUtf8") == 0 ) { yyParsingUtf8 = true; return Tok_trUtf8; } else if ( qstrcmp(yyIdent + 1, "translate") == 0 ) { yyParsingUtf8 = false; return Tok_translate; } break; } /* * Handle the standard Python v2 and v3 string prefixes by simply * ignoring them. */ if (!might_be_str) return Tok_Ident; if (yyCh != '"' && yyCh != '\'') return Tok_Ident; } { switch ( yyCh ) { case '#': do { yyCh = getChar(); } while ( yyCh != EOF && yyCh != '\n' ); break; case '"': case '\'': int quoteChar; int trippelQuote, singleQuote; int in; quoteChar = yyCh; trippelQuote = 0; singleQuote = 1; in = 0; yyCh = getChar(); quiet = false; while ( yyCh != EOF ) { if ( singleQuote && (yyCh == '\n' || (in && yyCh == quoteChar)) ) break; if ( yyCh == quoteChar ) { if (peekChar() == quoteChar) { yyCh = getChar(); if (!trippelQuote) { trippelQuote = 1; singleQuote = 0; in = 1; yyCh = getChar(); } else { yyCh = getChar(); if (yyCh == quoteChar) { trippelQuote = 0; break; } } } else if (trippelQuote) { if ( yyStringLen < sizeof(yyString) - 1 ) yyString[yyStringLen++] = (char) yyCh; yyCh = getChar(); continue; } else break; } else in = 1; if ( yyCh == '\\' ) { yyCh = getChar(); if ( yyCh == 'x' ) { QByteArray hex = "0"; yyCh = getChar(); while ( isxdigit(yyCh) ) { hex += (char) yyCh; yyCh = getChar(); } #if defined(_MSC_VER) && _MSC_VER >= 1400 sscanf_s( hex, "%x", &n ); #else sscanf( hex, "%x", &n ); #endif if ( yyStringLen < sizeof(yyString) - 1 ) yyString[yyStringLen++] = (char) n; } else if ( yyCh >= '0' && yyCh < '8' ) { QByteArray oct = ""; int n = 0; do { oct += (char) yyCh; ++n; yyCh = getChar(); } while ( yyCh >= '0' && yyCh < '8' && n < 3 ); #if defined(_MSC_VER) && _MSC_VER >= 1400 sscanf_s( oct, "%o", &n ); #else sscanf( oct, "%o", &n ); #endif if ( yyStringLen < sizeof(yyString) - 1 ) yyString[yyStringLen++] = (char) n; } else if ( yyCh == '\n' ) { yyCh = getChar(); } else { const char *p = strchr( tab, yyCh ); if ( yyStringLen < sizeof(yyString) - 1 ) yyString[yyStringLen++] = ( p == 0 ) ? (char) yyCh : backTab[p - tab]; yyCh = getChar(); } } else { if (!yyCodecForSource) { if ( yyParsingUtf8 && yyCh >= 0x80 && !quiet) { qWarning( "%s:%d: Non-ASCII character detected in trUtf8 string", (const char *) yyFileName, yyLineNo ); quiet = true; } // common case: optimized if ( yyStringLen < sizeof(yyString) - 1 ) yyString[yyStringLen++] = (char) yyCh; yyCh = getChar(); } else { QByteArray originalBytes; while ( yyCh != EOF && (trippelQuote || yyCh != '\n') && yyCh != quoteChar && yyCh != '\\' ) { if ( yyParsingUtf8 && yyCh >= 0x80 && !quiet) { qWarning( "%s:%d: Non-ASCII character detected in trUtf8 string", (const char *) yyFileName, yyLineNo ); quiet = true; } originalBytes += (char)yyCh; yyCh = getChar(); } QString unicodeStr = yyCodecForSource->toUnicode(originalBytes); QByteArray convertedBytes; if (!yyCodecForTr->canEncode(unicodeStr) && !quiet) { qWarning( "%s:%d: Cannot convert Python string from %s to %s", (const char *) yyFileName, yyLineNo, yyCodecForSource->name().constData(), yyCodecForTr->name().constData() ); quiet = true; } convertedBytes = yyCodecForTr->fromUnicode(unicodeStr); size_t len = qMin((size_t)convertedBytes.size(), sizeof(yyString) - yyStringLen - 1); memcpy(yyString + yyStringLen, convertedBytes.constData(), len); yyStringLen += len; } } } yyString[yyStringLen] = '\0'; if ( yyCh != quoteChar ) { if (trippelQuote) qWarning("%s:%d: Empty or unterminated triple quoted string", (const char *)yyFileName, yyLineNo); else qWarning("%s:%d: Unterminated string", (const char *)yyFileName, yyLineNo); } if ( yyCh == EOF ) { return Tok_Eof; } else { yyCh = getChar(); return Tok_String; } break; case '(': if (yyParenDepth == 0) yyParenLineNo = yyCurLineNo; yyParenDepth++; yyCh = getChar(); return Tok_LeftParen; case ')': if (yyParenDepth == 0) yyParenLineNo = yyCurLineNo; yyParenDepth--; yyCh = getChar(); return Tok_RightParen; case ',': yyCh = getChar(); return Tok_Comma; case '.': yyCh = getChar(); return Tok_Dot; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { QByteArray ba; ba+=yyCh; yyCh = getChar(); bool hex = yyCh == 'x'; if ( hex ) { ba+=yyCh; yyCh = getChar(); } while ( (hex ? isxdigit(yyCh) : isdigit(yyCh)) ) { ba+=yyCh; yyCh = getChar(); } bool ok; yyInteger = ba.toLongLong(&ok); if (ok) return Tok_Integer; break; } default: yyCh = getChar(); } } } return Tok_Eof; }
Token Lexer::nextToken() { if (isDone()) { return *finish_token; } int tokenType = END_OF_FILE; char c = nextChar(); string currentValue=""; string jsonValue =""; while (isDone() != true && isSpace(c)==true) { c = nextChar(); } if ('"' == c) { tokenType = VALUE; if (isDone() != true) { c = nextChar(); while (index < length && c != '"') { currentValue += c; if (c == '\\' && index < length) { c = nextChar(); currentValue += c; } c = nextChar(); } jsonValue = currentValue; } else { throw KevoreeException("Lexer Unterminated string nextToken"); } }else if ('{' == c) { tokenType = LEFT_BRACE; } else if ('}' == c) { tokenType = RIGHT_BRACE; } else if ('[' == c) { tokenType = LEFT_BRACKET; } else if (']' == c) { tokenType = RIGHT_BRACKET; } else if (':' == c) { tokenType = COLON; } else if (',' == c) { tokenType = COMMA; } else if (! isDone()) { while (isValueLetter(tolower(c)) == true) { currentValue += c; if (isValueLetter(tolower(peekChar())) != true) { break; } else { c = nextChar(); } } string v = currentValue; std::transform(v.begin(), v.end(), v.begin(), ::tolower); if (v.compare("true") == 0) { jsonValue = "true"; } else if (v.compare("false") == 0) { jsonValue = "false"; } else { jsonValue = v; } tokenType = VALUE; } else { tokenType = END_OF_FILE; } return Token(tokenType, jsonValue); }
FlowToken FlowLexer::nextToken() { bool expectsValue = token() == FlowToken::Ident || FlowTokenTraits::isOperator(token()); lastPos_ = currPos_; if (consumeSpace()) return token_ = FlowToken::Eof; // printf("FlowLexer.nextToken: currentChar %s curr[%zu:%zu.%zu] next[%zu:%zu.%zu]\n", // escape(currentChar_).c_str(), // currPos_.line, currPos_.column, currPos_.offset, // nextPos_.line, nextPos_.column, nextPos_.offset); content_.clear(); content_ += static_cast<char>(currentChar_); lastLocation_ = currLocation_; currLocation_.begin = currPos_; switch (currentChar_) { case EOF: // (-1) return token_ = FlowToken::Eof; case '=': switch (nextChar()) { case '=': nextChar(); return token_ = FlowToken::Equal; case '^': nextChar(); return token_ = FlowToken::PrefixMatch; case '$': nextChar(); return token_ = FlowToken::SuffixMatch; case '~': nextChar(); return token_ = FlowToken::RegexMatch; case '>': nextChar(); return token_ = FlowToken::HashRocket; default: return token_ = FlowToken::Assign; } case '<': switch (nextChar()) { case '<': nextChar(); return token_ = FlowToken::Shl; case '=': nextChar(); return token_ = FlowToken::LessOrEqual; default: return token_ = FlowToken::Less; } case '>': switch (nextChar()) { case '>': nextChar(); return token_ = FlowToken::Shr; case '=': nextChar(); return token_ = FlowToken::GreaterOrEqual; default: return token_ = FlowToken::Greater; } case '|': switch (nextChar()) { case '|': nextChar(); return token_ = FlowToken::Or; case '=': nextChar(); return token_ = FlowToken::OrAssign; default: return token_ = FlowToken::BitOr; } case '&': switch (nextChar()) { case '&': nextChar(); return token_ = FlowToken::And; case '=': nextChar(); return token_ = FlowToken::AndAssign; default: return token_ = FlowToken::BitAnd; } case '.': if (nextChar() == '.') { if (nextChar() == '.') { nextChar(); return token_ = FlowToken::Ellipsis; } return token_ = FlowToken::DblPeriod; } return token_ = FlowToken::Period; case ':': if (peekChar() == ':') { stringValue_.clear(); return continueParseIPv6(false); } else { nextChar(); return token_ = FlowToken::Colon; } case ';': nextChar(); return token_ = FlowToken::Semicolon; case ',': nextChar(); return token_ = FlowToken::Comma; case '{': nextChar(); return token_ = FlowToken::Begin; case '}': if (interpolationDepth_) { return token_ = parseInterpolationFragment(false); } else { nextChar(); return token_ = FlowToken::End; } case '(': nextChar(); return token_ = FlowToken::RndOpen; case ')': nextChar(); return token_ = FlowToken::RndClose; case '[': nextChar(); return token_ = FlowToken::BrOpen; case ']': nextChar(); return token_ = FlowToken::BrClose; case '+': nextChar(); return token_ = FlowToken::Plus; case '-': nextChar(); return token_ = FlowToken::Minus; case '*': switch (nextChar()) { case '*': nextToken(); return token_ = FlowToken::Pow; default: return token_ = FlowToken::Mul; } case '/': if (expectsValue) return token_ = parseString('/', FlowToken::RegExp); nextChar(); return token_ = FlowToken::Div; case '%': nextChar(); return token_ = FlowToken::Mod; case '!': switch (nextChar()) { case '=': nextChar(); return token_ = FlowToken::UnEqual; default: return token_ = FlowToken::Not; } case '\'': return token_ = parseString(true); case '"': ++interpolationDepth_; return token_ = parseInterpolationFragment(true); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return parseNumber(); default: if (std::isalpha(currentChar()) || currentChar() == '_') return token_ = parseIdent(); if (std::isprint(currentChar())) printf("lexer: unknown char %c (0x%02X)\n", currentChar(), currentChar()); else printf("lexer: unknown char %u (0x%02X)\n", currentChar() & 0xFF, currentChar() & 0xFF); nextChar(); return token_ = FlowToken::Unknown; } }
/** * \retval true abort tokenizing in caller * \retval false continue tokenizing in caller */ bool FlowLexer::consumeSpace() { // skip spaces for (;; nextChar()) { if (eof()) return true; if (std::isspace(currentChar_)) continue; if (std::isprint(currentChar_)) break; // TODO proper error reporting through API callback std::fprintf(stderr, "%s[%04zu:%02zu]: invalid byte %d (0x%02X)\n", currLocation_.fileName.c_str(), nextPos_.line, nextPos_.column, currentChar() & 0xFF, currentChar() & 0xFF); } if (eof()) return true; if (currentChar() == '#') { // skip chars until EOL for (;;) { if (eof()) { token_ = FlowToken::Eof; return true; } if (currentChar() == '\n') { nextChar(); return consumeSpace(); } nextChar(); } } if (currentChar() == '/' && peekChar() == '*') { // "/*" ... "*/" // parse multiline comment nextChar(); for (;;) { if (eof()) { token_ = FlowToken::Eof; // reportError(Error::UnexpectedEof); return true; } if (currentChar() == '*' && peekChar() == '/') { nextChar(); // skip '*' nextChar(); // skip '/' break; } nextChar(); } return consumeSpace(); } return false; }