void Tokenizer::AddToken(TokenCategory category, bool enclosed, const TokenRange& range) { tokens_.push_back(Token(category, filename_.substr(range.offset, range.size), enclosed)); }
Token MapTokenEmitter::doEmit(Tokenizer& tokenizer, size_t line, size_t column) { const size_t startPosition = tokenizer.position(); while (!tokenizer.eof()) { char c = tokenizer.nextChar(); switch (c) { case '/': if (tokenizer.peekChar() == '/') { // eat everything up to and including the next newline while (tokenizer.nextChar() != '\n'); } break; case '{': return Token(TokenType::OBrace, "", startPosition, tokenizer.position() - startPosition, line, column); case '}': return Token(TokenType::CBrace, "", startPosition, tokenizer.position() - startPosition, line, column); case '(': return Token(TokenType::OParenthesis, "", startPosition, tokenizer.position() - startPosition, line, column); case ')': return Token(TokenType::CParenthesis, "", startPosition, tokenizer.position() - startPosition, line, column); case '[': return Token(TokenType::OBracket, "", startPosition, tokenizer.position() - startPosition, line, column); case ']': return Token(TokenType::CBracket, "", startPosition, tokenizer.position() - startPosition, line, column); case '"': // quoted string m_buffer.str(String()); while (!tokenizer.eof() && (c = tokenizer.nextChar()) != '"') m_buffer << c; return Token(TokenType::String, m_buffer.str(), startPosition, tokenizer.position() - startPosition, line, column); default: // whitespace, integer, decimal or word if (isWhitespace(c)) break; // clear the buffer m_buffer.str(String()); // try to read a number if (c == '-' || isDigit(c)) { m_buffer << c; while (isDigit((c = tokenizer.nextChar()))) m_buffer << c; if (isDelimiter(c)) { if (!tokenizer.eof()) tokenizer.pushChar(); return Token(TokenType::Integer, m_buffer.str(), startPosition, tokenizer.position() - startPosition, line, column); } } // try to read a decimal (may start with '.') if (c == '.') { m_buffer << c; while (isDigit((c = tokenizer.nextChar()))) m_buffer << c; if (isDelimiter(c)) { if (!tokenizer.eof()) tokenizer.pushChar(); return Token(TokenType::Decimal, m_buffer.str(), startPosition, tokenizer.position() - startPosition, line, column); } } // read a word m_buffer << c; while (!tokenizer.eof() && !isDelimiter(c = tokenizer.nextChar())) m_buffer << c; if (!tokenizer.eof()) tokenizer.pushChar(); return Token(TokenType::String, m_buffer.str(), startPosition, tokenizer.position() - startPosition, line, column); } } return Token(TokenType::Eof, "", startPosition, tokenizer.position() - startPosition, line, column); }
static void clear(Token &t) { t = Token(); }
void TokenStream::advance() { if (current != end) { current = nextToken(current, end, currentToken); } else currentToken = Token(); // BAD_TOKEN }
NumberValue::NumberValue(double value) { tokens.push_back(Token("", Token::NUMBER, 0, 0, "generated")); type = NUMBER; setValue(value); }
// get a parsed line. // if no more lines exist, returns false bool Parser::getLine(std::vector<Token> *o_tokens) { o_tokens->clear(); m_lineNumber = m_internalLineNumber; tstringi line; bool isTokenExist = false; continue_getLineLoop: while (getLine(&line)) { const _TCHAR *t = line.c_str(); continue_getTokenLoop: while (true) { // skip white space while (*t != _T('\0') && _istspace(*t)) t ++; if (*t == _T('\0') || *t == _T('#')) goto break_getTokenLoop; // no more tokens exist if (*t == _T('\\') && *(t + 1) == _T('\0')) goto continue_getLineLoop; // continue to next line const _TCHAR *tokenStart = t; // comma or empty token if (*t == _T(',')) { if (!isTokenExist) o_tokens->push_back(Token(_T(""), false)); isTokenExist = false; o_tokens->push_back(Token(Token::Type_comma)); t ++; goto continue_getTokenLoop; } // paren if (*t == _T('(')) { o_tokens->push_back(Token(Token::Type_openParen)); isTokenExist = false; t ++; goto continue_getTokenLoop; } if (*t == _T(')')) { if (!isTokenExist) o_tokens->push_back(Token(_T(""), false)); isTokenExist = true; o_tokens->push_back(Token(Token::Type_closeParen)); t ++; goto continue_getTokenLoop; } isTokenExist = true; // prefix if (m_prefixes) for (size_t i = 0; i < m_prefixes->size(); i ++) if (_tcsnicmp(tokenStart, m_prefixes->at(i).c_str(), m_prefixes->at(i).size()) == 0) { o_tokens->push_back(Token(m_prefixes->at(i), false)); t += m_prefixes->at(i).size(); goto continue_getTokenLoop; } // quoted or regexp if (*t == _T('"') || *t == _T('\'') || *t == _T('/') || (*t == _T('\\') && *(t + 1) == _T('m') && *(t + 2) != _T('\0'))) { bool isRegexp = !(*t == _T('"') || *t == _T('\'')); _TCHAR q[2] = { *t++, _T('\0') }; // quote character if (q[0] == _T('\\')) { t++; q[0] = *t++; } tokenStart = t; while (*t != _T('\0') && *t != q[0]) { if (*t == _T('\\') && *(t + 1)) t ++; if (_istlead(*t) && *(t + 1)) t ++; t ++; } tstring str = interpretMetaCharacters(tokenStart, t - tokenStart, q, isRegexp); #ifdef _MBCS if (isRegexp) str = guardRegexpFromMbcs(str.c_str()); #endif // concatinate continuous string if (!isRegexp && 0 < o_tokens->size() && o_tokens->back().isString() && o_tokens->back().isQuoted()) o_tokens->back().add(str); else o_tokens->push_back(Token(str, true, isRegexp)); if (*t != _T('\0')) t ++; goto continue_getTokenLoop; } // not quoted { while (isSymbolChar(*t)) { if (*t == _T('\\')) if (*(t + 1)) t ++; else break; if (_istlead(*t) && *(t + 1)) t ++; t ++; } if (t == tokenStart) { ErrorMessage e; e << _T("invalid character "); #ifdef UNICODE e << _T("U+"); e << std::hex; // << std::setw(4) << std::setfill(_T('0')); e << (int)(wchar_t)*t; #else e << _T("\\x"); e << std::hex; // << std::setw(2) << std::setfill(_T('0')); e << (int)(u_char)*t; #endif e << std::dec; if (_istprint(*t)) e << _T("(") << *t << _T(")"); throw e; } _TCHAR *numEnd = NULL; long value = _tcstol(tokenStart, &numEnd, 0); if (tokenStart == numEnd) { tstring str = interpretMetaCharacters(tokenStart, t - tokenStart); o_tokens->push_back(Token(str, false)); } else { o_tokens->push_back( Token(value, tstringi(tokenStart, numEnd - tokenStart))); t = numEnd; } goto continue_getTokenLoop; } } break_getTokenLoop: if (0 < o_tokens->size()) break; m_lineNumber = m_internalLineNumber; isTokenExist = false; } return 0 < o_tokens->size(); }
TEST_F(LexerTest, CommentsSingleLine) { ASSERT_EQ(getTokens("// test\n"), std::vector<Token> {Token(TT::FILE_END, "", defaultTrace)}); ASSERT_EQ(getTokens("// asd 123 . ////**//"), std::vector<Token> {Token(TT::FILE_END, "", defaultTrace)}); ASSERT_EQ(lx.getLineCount(), 1); }
bool tokeniseNumeric(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok) { std::string::const_iterator t = s; // Hand constructed state machine recogniser enum { START, REJECT, DIGIT, DECIMAL_START, DECIMAL, EXPONENT_SIGN, EXPONENT_START, EXPONENT, ACCEPT_EXACT, ACCEPT_INEXACT } state = START; while (true) switch (state) { case START: if (t==e) {state = REJECT;} else if (std::isdigit(*t)) {++t; state = DIGIT;} else if (*t=='.') {++t; state = DECIMAL_START;} else state = REJECT; break; case DECIMAL_START: if (t==e) {state = REJECT;} else if (std::isdigit(*t)) {++t; state = DECIMAL;} else state = REJECT; break; case EXPONENT_SIGN: if (t==e) {state = REJECT;} else if (*t=='-' || *t=='+') {++t; state = EXPONENT_START;} else if (std::isdigit(*t)) {++t; state = EXPONENT;} else state = REJECT; break; case EXPONENT_START: if (t==e) {state = REJECT;} else if (std::isdigit(*t)) {++t; state = EXPONENT;} else state = REJECT; break; case DIGIT: if (t==e) {state = ACCEPT_EXACT;} else if (std::isdigit(*t)) {++t; state = DIGIT;} else if (*t=='.') {++t; state = DECIMAL;} else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;} else state = ACCEPT_EXACT; break; case DECIMAL: if (t==e) {state = ACCEPT_INEXACT;} else if (std::isdigit(*t)) {++t; state = DECIMAL;} else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;} else state = ACCEPT_INEXACT; break; case EXPONENT: if (t==e) {state = ACCEPT_INEXACT;} else if (std::isdigit(*t)) {++t; state = EXPONENT;} else state = ACCEPT_INEXACT; break; case ACCEPT_EXACT: tok = Token(T_NUMERIC_EXACT, s, t); s = t; return true; case ACCEPT_INEXACT: tok = Token(T_NUMERIC_APPROX, s, t); s = t; return true; case REJECT: return false; }; }
static Token realGetToken(istream* br) { enum LexState { BEGIN, INID, INSTRING, ININT, ONESLASH, INCOMMENT } lexstate = BEGIN; string lexeme; for(;;) { int ch = br->get(); if( br->bad() || br->eof() ) break; if( ch == '\n' ) linenum++; switch( lexstate ) { case BEGIN: if( isspace(ch) ) continue; lexeme = ch; if( isalpha(ch) ) { lexstate = INID; } else if( ch == '"' ) { lexstate = INSTRING; } else if( isdigit(ch) ) { lexstate = ININT; } else switch( ch ) { case '+': return Token::PLUSOP; case '-': return Token::MINUSOP; case '*': return Token::STAROP; case '/': lexstate = ONESLASH; break; case '=': return Token::EQOP; case '(': return Token::LPAREN; case ')': return Token::RPAREN; case ';': return Token::SC; default: return Token::ERR; } break; case INID: if( isalpha(ch) ) { lexeme += ch; } else if( isdigit(ch) ) { lexeme += ch; return Token(Token::ERR, lexeme); } else { br->putback(ch); return id_or_kw(lexeme); } break; case INSTRING: lexeme += ch; if( ch == '\n' ) { return Token(Token::ERR, lexeme ); } if( ch == '"' ) { return Token(Token::SCONST, lexeme ); } break; case ININT: if( isdigit(ch) ) { lexeme += ch; } else if( isalpha(ch) ) { lexeme += ch; return Token(Token::ERR, lexeme); } else { br->putback(ch); return Token(Token::ICONST, lexeme); } break; case ONESLASH: if( ch != '/' ) { lexeme += ch; return Token(Token::ERR, lexeme ); } lexstate = INCOMMENT; break; case INCOMMENT: if( ch == '\n' ) { lexstate = BEGIN; } break; } } if( br->bad() ) return Token::ERR; if( br->eof() ) return Token::DONE; return Token(); }
bool FunctionCallTip::getCursorFunction() { auto line = _pEditView->execute(SCI_LINEFROMPOSITION, _curPos); int startpos = static_cast<int32_t>(_pEditView->execute(SCI_POSITIONFROMLINE, line)); int endpos = static_cast<int32_t>(_pEditView->execute(SCI_GETLINEENDPOSITION, line)); int len = endpos - startpos + 3; //also take CRLF in account, even if not there int offset = _curPos - startpos; //offset is cursor location, only stuff before cursor has influence const int maxLen = 256; if ((offset < 2) || (len >= maxLen)) { reset(); return false; //cannot be a func, need name and separator } TCHAR lineData[maxLen] = TEXT(""); _pEditView->getLine(line, lineData, len); //line aquired, find the functionname //first split line into tokens to parse //token is identifier or some expression, whitespace is ignored std::vector< Token > tokenVector; int tokenLen = 0; TCHAR ch; for (int i = 0; i < offset; ++i) //we dont care about stuff after the offset { //tokenVector.push_back(pair(lineData+i, len)); ch = lineData[i]; if (isBasicWordChar(ch) || isAdditionalWordChar(ch)) //part of identifier { tokenLen = 0; TCHAR * begin = lineData+i; while ((isBasicWordChar(ch) || isAdditionalWordChar(ch)) && i < offset) { ++tokenLen; ++i; ch = lineData[i]; } tokenVector.push_back(Token(begin, tokenLen, true)); i--; //correct overshooting of while loop } else { if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') //whitespace { //do nothing } else { tokenLen = 1; tokenVector.push_back(Token(lineData+i, tokenLen, false)); } } } size_t vsize = tokenVector.size(); //mind nested funcs, like |blblb a (x, b(), c);| //therefore, use stack std::vector<FunctionValues> valueVec; FunctionValues curValue, newValue; int scopeLevel = 0; for (size_t i = 0; i < vsize; ++i) { Token & curToken = tokenVector.at(i); if (curToken.isIdentifier) { curValue.lastIdentifier = static_cast<int32_t>(i); } else { if (curToken.token[0] == _start) { ++scopeLevel; newValue = curValue; valueVec.push_back(newValue); //store the current settings, so when this new function doesnt happen to be the 'real' one, we can restore everything curValue.scopeLevel = scopeLevel; if (i > 0 && curValue.lastIdentifier == static_cast<int32_t>(i) - 1) { //identifier must be right before (, else we have some expression like "( x + y() )" curValue.lastFunctionIdentifier = curValue.lastIdentifier; curValue.param = 0; } else { //some expression curValue.lastFunctionIdentifier = -1; } } else if (curToken.token[0] == _param && curValue.lastFunctionIdentifier > -1) { ++curValue.param; } else if (curToken.token[0] == _stop) { if (scopeLevel) //scope cannot go below -1 scopeLevel--; if (valueVec.size() > 0) { //only pop level if scope was of actual function curValue = valueVec.back(); valueVec.pop_back(); } else { //invalidate curValue curValue = FunctionValues(); } } else if (curToken.token[0] == _terminal) { //invalidate everything valueVec.clear(); curValue = FunctionValues(); } } } bool res = false; if (curValue.lastFunctionIdentifier == -1) { //not in direct function. Start popping the stack untill we empty it, or a func IS found while(curValue.lastFunctionIdentifier == -1 && valueVec.size() > 0) { curValue = valueVec.back(); valueVec.pop_back(); } } if (curValue.lastFunctionIdentifier > -1) { Token funcToken = tokenVector.at(curValue.lastFunctionIdentifier); funcToken.token[funcToken.length] = 0; _currentParam = curValue.param; bool same = false; if (_funcName) { if(_ignoreCase) same = testNameNoCase(_funcName, funcToken.token, lstrlen(_funcName)) == 0; else same = generic_strncmp(_funcName, funcToken.token, lstrlen(_funcName)) == 0; } if (!same) { //check if we need to reload data if (_funcName) { delete [] _funcName; } _funcName = new TCHAR[funcToken.length+1]; lstrcpy(_funcName, funcToken.token); res = loadFunction(); } else { res = true; } } return res; }
/** * @version * - JR Lewis pre-2012.03.10 * - Initial version. */ Element Token_::Lex_(CharacterStream& in) const { in.Consume(); return Token(this->token); }
Element Token_::Interpret_(Environment& ) { return Token(this); }
int Database::FTSOpen( sqlite3_tokenizer* pTokenizer, const char* input, int bytes, sqlite3_tokenizer_cursor** cursor) { UnicodeTokenizerCursor* new_cursor = new UnicodeTokenizerCursor; new_cursor->pTokenizer = pTokenizer; new_cursor->position = 0; QString str = QString::fromUtf8(input, bytes).toLower(); QChar* data = str.data(); // Decompose and strip punctuation. QList<Token> tokens; QString token; int start_offset = 0; int offset = 0; for (int i = 0; i < str.length(); ++i) { QChar c = data[i]; ushort unicode = c.unicode(); if (unicode <= 0x007f) { offset += 1; } else if (unicode >= 0x0080 && unicode <= 0x07ff) { offset += 2; } else if (unicode >= 0x0800) { offset += 3; } // Unicode astral planes unsupported in Qt? /*else if (unicode >= 0x010000 && unicode <= 0x10ffff) { offset += 4; }*/ if (!data[i].isLetterOrNumber()) { // Token finished. if (token.length() != 0) { tokens << Token(token, start_offset, offset - 1); start_offset = offset; token.clear(); } else { ++start_offset; } } else { if (data[i].decompositionTag() != QChar::NoDecomposition) { token.push_back(data[i].decomposition()[0]); } else { token.push_back(data[i]); } } if (i == str.length() - 1) { if (token.length() != 0) { tokens << Token(token, start_offset, offset); token.clear(); } } } new_cursor->tokens = tokens; *cursor = reinterpret_cast<sqlite3_tokenizer_cursor*>(new_cursor); return SQLITE_OK; }
TEST_F(LexerTest, Radix) { EXPECT_THROW(getTokens("0j123")[0], Error); EXPECT_THROW(getTokens("0123")[0], Error); EXPECT_THROW(getTokens("0x0123")[0], Error); EXPECT_EQ(getTokens("0")[0], Token(TT::INTEGER, "0", defaultTrace)); }
void XMLFile::AddToken(std::string& text, TokenType type) { m_tokens.push_back(Token(text, type)); text = ""; }
// Internal helper function for the compilation // This is where the actual compilation is done. // It is done recursivly for each parenthesis. int Expression::_compile(int start, Vector<Token>& tokens, Vector<Token>& rpn) { int i; Stack<eTokenType> opStack; eTokenType lastTok = NONE; for(i = start; i < tokens.size(); i++) { if(tokens[i].type == RPAREN) break; switch(tokens[i].type) { case PLUS: case MINUS: // do some simple optimizations of the // expression (instead of having unary // operations for these kind of series) if(lastTok==MINUS) { if(tokens[i].type==MINUS) { opStack.pop(); opStack.push(PLUS); break; } } else if(lastTok==PLUS) { if(tokens[i].type==MINUS) { opStack.pop(); opStack.push(MINUS); break; } } else { // if next token is a number and this // token is a minus (unary) then just negate the number if( i+1<tokens.size() && IS_EVALUATABLE_LEFT(tokens[i+1].type) && !(IS_EVALUATABLE_RIGHT(lastTok)) && tokens[i].type == MINUS) { opStack.push(MINUS); rpn.add(Token(NUMBER, 0.0)); break; } } case DIV: case MUL: // just push the operator on the operator stack. opStack.push(tokens[i].type); break; case LPAREN: case FUNCTION: case VARIABLE: case NUMBER: // if we found a left parenthesis lets recurse into it and increment our // index with the amount of tokens that has been parsed through the // recursion. Else just add the number to the rpn expression. if(tokens[i].type==LPAREN||tokens[i].type==FUNCTION) { if(tokens[i].type==FUNCTION) { int last_i = i; i+=_compile(i+1, tokens, rpn)+1; rpn.add(tokens[last_i]); } else { i+=_compile(i+1, tokens, rpn)+1; } } else { rpn.add(tokens[i]); } // if our operator stack isn't empty lets peek the last operator // and see if it is time to add it to the rpn (according to predecence) if(!opStack.empty()) { eTokenType lastOp = opStack.peek(); eTokenType nextOp = NONE; if(i+1<tokens.size()) nextOp = tokens[i+1].type; if((lastOp == PLUS || lastOp == MINUS) && (nextOp == PLUS || nextOp == MINUS || nextOp == RPAREN || nextOp == NONE)) { rpn.add(Token(lastOp)); opStack.pop(); } else if((lastOp == MUL || lastOp == DIV)) { rpn.add(Token(lastOp)); opStack.pop(); if(nextOp == PLUS || nextOp == MINUS || nextOp == RPAREN || nextOp == NONE) { // nothing has lower predecence than these operators so just empty // the operator stack and add the operators to the rpn expression while(opStack.empty()!=true) { rpn.add(Token(opStack.peek())); opStack.pop(); } } } } break; case RPAREN: case END: case NONE: ; // do nothing } lastTok = tokens[i].type; } // add the remaining operators to the rpn expression while(opStack.empty()!=true) { rpn.add(Token(opStack.peek())); opStack.pop(); } return i-start; }
TEST_F(LexerTest, FloatLiterals) { EXPECT_EQ(getTokens("12.3")[0], Token(TT::FLOAT, "12.3", defaultTrace)); EXPECT_THROW(getTokens("12.")[0], Error); EXPECT_THROW(getTokens("12.123.")[0], Error); EXPECT_THROW(getTokens("0x12.123")[0], Error); }
TEST_F(LexerTest, Keywords) { EXPECT_EQ(getTokens("define")[0], Token(TT::DEFINE, "define", defaultTrace)); EXPECT_EQ(getTokens("function")[0], Token(TT::FUNCTION, "function", defaultTrace)); EXPECT_EQ(getTokens("protected")[0], Token(TT::PROTECT, "protected", defaultTrace)); }
// this function takes a string containing an arithmetic // expression in infix notation and compiles it into an // internal reverse polish notation representation. // which can then easily be evaluated (and re-evaluated) // This is done by first tokenizing the string // and then parse the tokens, converting // the infix notation to rpn. bool Expression::compile(const char *string) { const char *src = string; Vector<Token> tokens; // tokenize it and parse and convert numbers. while(*src) { if(IS_WHITESPACE(*src)) { src++; continue; } switch(*src) { case '(': tokens.add(Token(LPAREN)); break; case ')': tokens.add(Token(RPAREN)); break; case '-': tokens.add(Token(MINUS)); break; case '+': tokens.add(Token(PLUS)); break; case '*': tokens.add(Token(MUL)); break; case '/': tokens.add(Token(DIV)); break; default: if(IS_NUMBER(*src)) { char num[32]; num[0] = *src; src++; char *num_dest = num+1; int numDots = 0; while(IS_NUMBER(*src)||*src=='.') { if(*src=='.') { numDots++; if(numDots>1) { // error printf("wrong amount of dots in constant number."); return false; } } *num_dest = *src; num_dest++; src++; } *num_dest = 0; float i = atof(num); tokens.add(Token(NUMBER, i)); continue; } else if(IS_LETTER(*src)) { char litteral[255]; litteral[0] = *src; src++; char *litteral_dest = litteral+1; while(IS_LETTER(*src)||IS_NUMBER(*src)) { *litteral_dest = *src; litteral_dest++; src++; } *litteral_dest = 0; while(IS_WHITESPACE(*src)) src++; if(*src=='(') { tokens.add(Token(FUNCTION, scope.getFunction(litteral))); src++; } else { tokens.add(Token(VARIABLE, scope.getVariable(litteral))); } continue; } else return false; } src++; } // compile! this is done recursivly rpn.clear(); _compile(0, tokens, rpn); rpn.add(Token(END)); return true; }
TEST_F(LexerTest, Constructs) { EXPECT_EQ(getTokens(";")[0], Token(TT::SEMI, ";", defaultTrace)); EXPECT_EQ(getTokens("]")[0], Token(TT::SQPAREN_RIGHT, "]", defaultTrace)); }
TEST_F(LexerTest, FatArrow) { ASSERT_EQ(getTokens("=>")[0], Token(TT::FAT_ARROW, "=>", defaultTrace)); }
TEST_F(LexerTest, BooleanLiterals) { EXPECT_EQ(getTokens("true")[0], Token(TT::BOOLEAN, "true", defaultTrace)); EXPECT_EQ(getTokens("false")[0], Token(TT::BOOLEAN, "false", defaultTrace)); }
TEST_F(LexerTest, StringLiterals) { EXPECT_EQ(getTokens("\"qwerty123\"")[0], Token(TT::STRING, "qwerty123", defaultTrace)); EXPECT_THROW(getTokens("\"qwerty123")[0], Error); }
namespace consts { const Token none = Token(token::type::word, "none"); const Token bool_true = Token(token::type::word, "true"); const Token bool_false = Token(token::type::word, "false"); const Token parenthesis_open = Token(token::type::symbol, "("); const Token parenthesis_close = Token(token::type::symbol, ")"); const Token argument_separator = Token(token::type::symbol, ","); const Token line_end = Token(token::type::symbol, ";"); const Token access = Token(token::type::symbol, "."); const Token in = Token(token::type::symbol, "in"); const Token block_open = Token(token::type::symbol, "{"); const Token block_close = Token(token::type::symbol, "}"); const Token function_def = Token(token::type::word, "fun"); const Token function_ret = Token(token::type::word, "return"); }
void ExpressionParser::tokenize(const String & expression) { bool stateMemory = false; size_t len = expression.length(); for(size_t i = 0; i < len; i++) { char ch = expression[i]; switch(ch) { case '[': { stateMemory = true; _curToken += ch; } break; case ']': { stateMemory = false; _curToken += ch; } break; default: { if(stateMemory) _curToken += ch; else { switch(ch) { case '(': addOperatorToken(ch, Token::Type::OpenBracket); break; case ')': addOperatorToken(ch, Token::Type::CloseBracket); break; case '~': addOperatorToken(ch, Token::Type::OperatorNot); break; case '*': addOperatorToken(ch, Token::Type::OperatorMul); break; case '`': addOperatorToken(ch, Token::Type::OperatorHiMul); break; case '/': addOperatorToken(ch, Token::Type::OperatorDiv); break; case '%': addOperatorToken(ch, Token::Type::OperatorMod); break; case '+': if(!isUnaryOperator()) //skip all unary add operators addOperatorToken(ch, Token::Type::OperatorAdd); break; case '-': if(isUnaryOperator()) addOperatorToken(ch, Token::Type::OperatorUnarySub); else addOperatorToken(ch, Token::Type::OperatorSub); break; case '<': addOperatorToken(ch, Token::Type::OperatorShl); break; case '>': addOperatorToken(ch, Token::Type::OperatorShr); break; case '&': addOperatorToken(ch, Token::Type::OperatorAnd); break; case '^': addOperatorToken(ch, Token::Type::OperatorXor); break; case '|': addOperatorToken(ch, Token::Type::OperatorOr); break; case ' ': //ignore spaces break; default: _curToken += ch; break; } } } break; } } if(_curToken.length() != 0) //make sure the last token is added _tokens.push_back(Token(_curToken, Token::Type::Data)); }
TEST_F(LexerTest, IntegerLiterals) { EXPECT_EQ(getTokens("123")[0], Token(TT::INTEGER, "123", defaultTrace)); EXPECT_EQ(getTokens("0xA")[0], Token(TT::INTEGER, "10", defaultTrace)); EXPECT_EQ(getTokens("0o10")[0], Token(TT::INTEGER, "8", defaultTrace)); EXPECT_EQ(getTokens("0b10")[0], Token(TT::INTEGER, "2", defaultTrace)); }
TEST_F(LexerTest, CommentsMultiLine) { ASSERT_EQ(getTokens("/*asdad\ndasd\nasd*/"), std::vector<Token> {Token(TT::FILE_END, "", defaultTrace)}); ASSERT_EQ(lx.getLineCount(), 3); }
Token *Scanner::PushToken(Token::TYPE type) { m_tokens.push(Token(type, INPUT.mark())); return &m_tokens.back(); }
Token MapTokenEmitter::doEmit(Tokenizer& tokenizer) { while (!tokenizer.eof()) { size_t line = tokenizer.line(); size_t column = tokenizer.column(); const char* c = tokenizer.nextChar(); switch (*c) { case '/': if (tokenizer.peekChar() == '/') { tokenizer.nextChar(); if (tokenizer.peekChar() == '/') { tokenizer.nextChar(); // it's a TB comment } else { // eat everything up to and including the next newline while (*tokenizer.nextChar() != '\n'); } } break; case '{': return Token(TokenType::OBrace, c, c + 1, tokenizer.offset(c), line, column); case '}': return Token(TokenType::CBrace, c, c + 1, tokenizer.offset(c), line, column); case '(': return Token(TokenType::OParenthesis, c, c + 1, tokenizer.offset(c), line, column); case ')': return Token(TokenType::CParenthesis, c, c + 1, tokenizer.offset(c), line, column); case '[': return Token(TokenType::OBracket, c, c + 1, tokenizer.offset(c), line, column); case ']': return Token(TokenType::CBracket, c, c + 1, tokenizer.offset(c), line, column); case '"': { // quoted string const char* begin = c; const char* end; tokenizer.quotedString(begin, end); return Token(TokenType::String, begin, end, tokenizer.offset(begin), line, column); } default: { // whitespace, integer, decimal or word if (isWhitespace(*c)) break; const char* begin = c; // try to read a number if (*c == '-' || isDigit(*c)) { while (isDigit(*(c = tokenizer.nextChar()))); if (isDelimiter(*c)) { if (!tokenizer.eof()) tokenizer.pushChar(); return Token(TokenType::Integer, begin, c, tokenizer.offset(begin), line, column); } } // try to read a decimal (may start with '.') if (*c == '.') { while (isDigit(*(c = tokenizer.nextChar()))); if (isDelimiter(*c)) { if (!tokenizer.eof()) tokenizer.pushChar(); return Token(TokenType::Decimal, begin, c, tokenizer.offset(begin), line, column); } } // try to read decimal in scientific notation if (*c == 'e') { c = tokenizer.nextChar(); if (isDigit(*c) || *c == '+' || *c == '-') { while (isDigit(*(c = tokenizer.nextChar()))); if (isDelimiter(*c)) { if (!tokenizer.eof()) tokenizer.pushChar(); return Token(TokenType::Decimal, begin, c, tokenizer.offset(begin), line, column); } } } // read a word while (!tokenizer.eof() && !isDelimiter(*(c = tokenizer.nextChar()))); if (!tokenizer.eof()) tokenizer.pushChar(); return Token(TokenType::String, begin, c, tokenizer.offset(begin), line, column); } } } return Token(TokenType::Eof, NULL, NULL, 0, tokenizer.line(), tokenizer.column()); }
TokenSIPVersion(void) : TokenAbstract("SIPVersion"), _sequence(Token("SIP/"), TokenDigits(), Token("."), TokenDigits()) { _sequence.disable_factory(true); }