Esempio n. 1
14
void Tokenizer::AddToken(TokenCategory category, bool enclosed,
                         const TokenRange& range) {
  tokens_.push_back(Token(category,
                          filename_.substr(range.offset, range.size),
                          enclosed));
}
 Token MapTokenEmitter::doEmit(Tokenizer& tokenizer, size_t line, size_t column) {
     const size_t startPosition = tokenizer.position();
     while (!tokenizer.eof()) {
         char c = tokenizer.nextChar();
         switch (c) {
             case '/':
                 if (tokenizer.peekChar() == '/') {
                     // eat everything up to and including the next newline
                     while (tokenizer.nextChar() != '\n');
                 }
                 break;
             case '{':
                 return Token(TokenType::OBrace, "", startPosition, tokenizer.position() - startPosition, line, column);
             case '}':
                 return Token(TokenType::CBrace, "", startPosition, tokenizer.position() - startPosition, line, column);
             case '(':
                 return Token(TokenType::OParenthesis, "", startPosition, tokenizer.position() - startPosition, line, column);
             case ')':
                 return Token(TokenType::CParenthesis, "", startPosition, tokenizer.position() - startPosition, line, column);
             case '[':
                 return Token(TokenType::OBracket, "", startPosition, tokenizer.position() - startPosition, line, column);
             case ']':
                 return Token(TokenType::CBracket, "", startPosition, tokenizer.position() - startPosition, line, column);
             case '"': // quoted string
                 m_buffer.str(String());
                 while (!tokenizer.eof() && (c = tokenizer.nextChar()) != '"')
                     m_buffer << c;
                 return Token(TokenType::String, m_buffer.str(), startPosition, tokenizer.position() - startPosition, line, column);
             default: // whitespace, integer, decimal or word
                 if (isWhitespace(c))
                     break;
                 
                 // clear the buffer
                 m_buffer.str(String());
                 
                 // try to read a number
                 if (c == '-' || isDigit(c)) {
                     m_buffer << c;
                     while (isDigit((c = tokenizer.nextChar())))
                         m_buffer << c;
                     if (isDelimiter(c)) {
                         if (!tokenizer.eof())
                             tokenizer.pushChar();
                         return Token(TokenType::Integer, m_buffer.str(), startPosition, tokenizer.position() - startPosition, line, column);
                     }
                 }
                 
                 // try to read a decimal (may start with '.')
                 if (c == '.') {
                     m_buffer << c;
                     while (isDigit((c = tokenizer.nextChar())))
                         m_buffer << c;
                     if (isDelimiter(c)) {
                         if (!tokenizer.eof())
                             tokenizer.pushChar();
                         return Token(TokenType::Decimal, m_buffer.str(), startPosition, tokenizer.position() - startPosition, line, column);
                     }
                 }
                 
                 // read a word
                 m_buffer << c;
                 while (!tokenizer.eof() && !isDelimiter(c = tokenizer.nextChar()))
                     m_buffer << c;
                 if (!tokenizer.eof())
                     tokenizer.pushChar();
                 return Token(TokenType::String, m_buffer.str(), startPosition, tokenizer.position() - startPosition, line, column);
         }
     }
     return Token(TokenType::Eof, "", startPosition, tokenizer.position() - startPosition, line, column);
 }
Esempio n. 3
0
 static void clear(Token &t) {
   t = Token();
 }
Esempio n. 4
0
 void TokenStream::advance() {
     if (current != end) {
         current = nextToken(current, end, currentToken);
     } else currentToken = Token(); // BAD_TOKEN
 }
Esempio n. 5
0
NumberValue::NumberValue(double value) {
  tokens.push_back(Token("", Token::NUMBER, 0, 0, "generated"));
  type = NUMBER;
  setValue(value);
}
Esempio n. 6
0
// get a parsed line.
// if no more lines exist, returns false
bool Parser::getLine(std::vector<Token> *o_tokens)
{
	o_tokens->clear();
	m_lineNumber = m_internalLineNumber;

	tstringi line;
	bool isTokenExist = false;
 continue_getLineLoop:
	while (getLine(&line))
	{
		const _TCHAR *t = line.c_str();

 continue_getTokenLoop:
		while (true)
		{
			// skip white space
			while (*t != _T('\0') && _istspace(*t))
				t ++;
			if (*t == _T('\0') || *t == _T('#'))
				goto break_getTokenLoop; // no more tokens exist
			if (*t == _T('\\') && *(t + 1) == _T('\0'))
				goto continue_getLineLoop; // continue to next line
      
			const _TCHAR *tokenStart = t;
      
			// comma or empty token
			if (*t == _T(','))
			{
				if (!isTokenExist)
					o_tokens->push_back(Token(_T(""), false));
				isTokenExist = false;
				o_tokens->push_back(Token(Token::Type_comma));
				t ++;
				goto continue_getTokenLoop;
			}

			// paren
			if (*t == _T('('))
			{
				o_tokens->push_back(Token(Token::Type_openParen));
				isTokenExist = false;
				t ++;
				goto continue_getTokenLoop;
			}
			if (*t == _T(')'))
			{
				if (!isTokenExist)
					o_tokens->push_back(Token(_T(""), false));
				isTokenExist = true;
				o_tokens->push_back(Token(Token::Type_closeParen));
				t ++;
				goto continue_getTokenLoop;
			}

			isTokenExist = true;
      
			// prefix
			if (m_prefixes)
				for (size_t i = 0; i < m_prefixes->size(); i ++)
					if (_tcsnicmp(tokenStart, m_prefixes->at(i).c_str(),
								  m_prefixes->at(i).size()) == 0)
					{
						o_tokens->push_back(Token(m_prefixes->at(i), false));
						t += m_prefixes->at(i).size();
						goto continue_getTokenLoop;
					}

			// quoted or regexp
			if (*t == _T('"') || *t == _T('\'') ||
				*t == _T('/') || (*t == _T('\\') && *(t + 1) == _T('m') &&
								  *(t + 2) != _T('\0')))
			{
				bool isRegexp = !(*t == _T('"') || *t == _T('\''));
				_TCHAR q[2] = { *t++, _T('\0') }; // quote character
				if (q[0] == _T('\\'))
				{
					t++;
					q[0] = *t++;
				}
				tokenStart = t;
	
				while (*t != _T('\0') && *t != q[0])
				{
					if (*t == _T('\\') && *(t + 1))
						t ++;
					if (_istlead(*t) && *(t + 1))
						t ++;
					t ++;
				}
	
				tstring str =
					interpretMetaCharacters(tokenStart, t - tokenStart, q, isRegexp);
#ifdef _MBCS
				if (isRegexp)
					str = guardRegexpFromMbcs(str.c_str());
#endif
				// concatinate continuous string
				if (!isRegexp &&
					0 < o_tokens->size() && o_tokens->back().isString() &&
					o_tokens->back().isQuoted())
					o_tokens->back().add(str);
				else
					o_tokens->push_back(Token(str, true, isRegexp));
				if (*t != _T('\0'))
					t ++;
				goto continue_getTokenLoop;
			}

			// not quoted
			{
				while (isSymbolChar(*t))
				{
					if (*t == _T('\\'))
						if (*(t + 1))
							t ++;
						else
							break;
					if (_istlead(*t) && *(t + 1))
						t ++;
					t ++;
				}
				if (t == tokenStart)
				{
					ErrorMessage e;
					e << _T("invalid character ");
#ifdef UNICODE
					e << _T("U+");
					e << std::hex; // << std::setw(4) << std::setfill(_T('0'));
					e << (int)(wchar_t)*t;
#else
					e << _T("\\x");
					e << std::hex; // << std::setw(2) << std::setfill(_T('0'));
					e << (int)(u_char)*t;
#endif
					e << std::dec;
					if (_istprint(*t))
						e << _T("(") << *t << _T(")");
					throw e;
				}
	
				_TCHAR *numEnd = NULL;
				long value = _tcstol(tokenStart, &numEnd, 0);
				if (tokenStart == numEnd)
				{
					tstring str = interpretMetaCharacters(tokenStart, t - tokenStart);
					o_tokens->push_back(Token(str, false));
				}
				else
				{
					o_tokens->push_back(
										Token(value, tstringi(tokenStart, numEnd - tokenStart)));
					t = numEnd;
				}
				goto continue_getTokenLoop;
			}
		}
 break_getTokenLoop:
		if (0 < o_tokens->size())
			break;
		m_lineNumber = m_internalLineNumber;
		isTokenExist = false;
	}
  
	return 0 < o_tokens->size();
}
Esempio n. 7
0
TEST_F(LexerTest, CommentsSingleLine) {
  ASSERT_EQ(getTokens("// test\n"), std::vector<Token> {Token(TT::FILE_END, "", defaultTrace)});
  ASSERT_EQ(getTokens("// asd 123 . ////**//"), std::vector<Token> {Token(TT::FILE_END, "", defaultTrace)});
  ASSERT_EQ(lx.getLineCount(), 1);
}
Esempio n. 8
0
bool tokeniseNumeric(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
{
    std::string::const_iterator t = s;

    // Hand constructed state machine recogniser
    enum {
        START,
        REJECT,
        DIGIT,
        DECIMAL_START,
        DECIMAL,
        EXPONENT_SIGN,
        EXPONENT_START,
        EXPONENT,
        ACCEPT_EXACT,
        ACCEPT_INEXACT
    } state = START;

    while (true)
    switch (state) {
    case START:
        if (t==e) {state = REJECT;}
        else if (std::isdigit(*t)) {++t; state = DIGIT;}
        else if (*t=='.') {++t; state = DECIMAL_START;}
        else state = REJECT;
        break;
    case DECIMAL_START:
        if (t==e) {state = REJECT;}
        else if (std::isdigit(*t)) {++t; state = DECIMAL;}
        else state = REJECT;
        break;
    case EXPONENT_SIGN:
        if (t==e) {state = REJECT;}
        else if (*t=='-' || *t=='+') {++t; state = EXPONENT_START;}
        else if (std::isdigit(*t)) {++t; state = EXPONENT;}
        else state = REJECT;
        break;
    case EXPONENT_START:
        if (t==e) {state = REJECT;}
        else if (std::isdigit(*t)) {++t; state = EXPONENT;}
        else state = REJECT;
        break;
    case DIGIT:
        if (t==e) {state = ACCEPT_EXACT;}
        else if (std::isdigit(*t)) {++t; state = DIGIT;}
        else if (*t=='.') {++t; state = DECIMAL;}
        else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;}
        else state = ACCEPT_EXACT;
        break;
    case DECIMAL:
        if (t==e) {state = ACCEPT_INEXACT;}
        else if (std::isdigit(*t)) {++t; state = DECIMAL;}
        else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;}
        else state = ACCEPT_INEXACT;
        break;
    case EXPONENT:
        if (t==e) {state = ACCEPT_INEXACT;}
        else if (std::isdigit(*t)) {++t; state = EXPONENT;}
        else state = ACCEPT_INEXACT;
        break;
    case ACCEPT_EXACT:
        tok = Token(T_NUMERIC_EXACT, s, t);
        s = t;
        return true;
    case ACCEPT_INEXACT:
        tok = Token(T_NUMERIC_APPROX, s, t);
        s = t;
        return true;
    case REJECT:
        return false;
    };
}
Esempio n. 9
0
static Token
realGetToken(istream* br)
{
	enum LexState { BEGIN, INID, INSTRING, ININT, ONESLASH, INCOMMENT } lexstate = BEGIN;
	string lexeme;

	for(;;) {
		int ch = br->get();
		if( br->bad() || br->eof() ) break;

		if( ch == '\n' )
			linenum++;

		switch( lexstate ) {
		case BEGIN:
			if( isspace(ch) )
				continue;

			lexeme = ch;

			if( isalpha(ch) ) {
				lexstate = INID;
			}
			else if( ch == '"' ) {
				lexstate = INSTRING;
			}
			else if( isdigit(ch) ) {
				lexstate = ININT;
			}
			else switch( ch ) {
			case '+':
				return Token::PLUSOP;
			case '-':
				return Token::MINUSOP;
			case '*':
				return Token::STAROP;
			case '/':
				lexstate = ONESLASH;
				break;
			case '=':
				return Token::EQOP;
			case '(':
				return Token::LPAREN;
			case ')':
				return Token::RPAREN;
			case ';':
				return Token::SC;
			default:
				return Token::ERR;
			}
			break;

		case INID:
			if( isalpha(ch) ) {
				lexeme += ch;
			}
			else if( isdigit(ch) ) {
				lexeme += ch;
				return Token(Token::ERR, lexeme);
			}
			else {
				br->putback(ch);
				return id_or_kw(lexeme);
			}
			break;

		case INSTRING:
			lexeme += ch;
			if( ch == '\n' ) {
				return Token(Token::ERR, lexeme );
			}
			if( ch == '"' ) {
				return Token(Token::SCONST, lexeme );
			}
			break;

		case ININT:
			if( isdigit(ch) ) {
				lexeme += ch;
			}
			else if( isalpha(ch) ) {
				lexeme += ch;
				return Token(Token::ERR, lexeme);
			}
			else {
				br->putback(ch);
				return Token(Token::ICONST, lexeme);
			}
			break;

		case ONESLASH:
			if( ch != '/' ) {
				lexeme += ch;
				return Token(Token::ERR, lexeme );
			}
			lexstate = INCOMMENT;
			break;

		case INCOMMENT:
			if( ch == '\n' ) {
				lexstate = BEGIN;
			}
			break;
		}

	}
	if( br->bad() ) return Token::ERR;
	if( br->eof() ) return Token::DONE;
	return Token();
}
bool FunctionCallTip::getCursorFunction()
{
	auto line = _pEditView->execute(SCI_LINEFROMPOSITION, _curPos);
	int startpos = static_cast<int32_t>(_pEditView->execute(SCI_POSITIONFROMLINE, line));
	int endpos = static_cast<int32_t>(_pEditView->execute(SCI_GETLINEENDPOSITION, line));
	int len = endpos - startpos + 3;	//also take CRLF in account, even if not there
	int offset = _curPos - startpos;	//offset is cursor location, only stuff before cursor has influence
	const int maxLen = 256;

	if ((offset < 2) || (len >= maxLen))
	{
		reset();
		return false;	//cannot be a func, need name and separator
	}
	
	TCHAR lineData[maxLen] = TEXT("");

	_pEditView->getLine(line, lineData, len);

	//line aquired, find the functionname
	//first split line into tokens to parse
	//token is identifier or some expression, whitespace is ignored
	std::vector< Token > tokenVector;
	int tokenLen = 0;
	TCHAR ch;
	for (int i = 0; i < offset; ++i) 	//we dont care about stuff after the offset
    {
		//tokenVector.push_back(pair(lineData+i, len));
		ch = lineData[i];
		if (isBasicWordChar(ch) || isAdditionalWordChar(ch))	//part of identifier
        {
			tokenLen = 0;
			TCHAR * begin = lineData+i;
            while ((isBasicWordChar(ch) || isAdditionalWordChar(ch)) && i < offset)
			{
				++tokenLen;
				++i;
				ch = lineData[i];
			}
			tokenVector.push_back(Token(begin, tokenLen, true));
			i--;	//correct overshooting of while loop
		}
        else
        {
			if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') 	//whitespace
            {
				//do nothing
			}
            else
            {
				tokenLen = 1;
				tokenVector.push_back(Token(lineData+i, tokenLen, false));
			}
		}
	}

	size_t vsize = tokenVector.size();
	//mind nested funcs, like |blblb a (x, b(), c);|
	//therefore, use stack
	std::vector<FunctionValues> valueVec;

	FunctionValues curValue, newValue;
	int scopeLevel = 0;
	for (size_t i = 0; i < vsize; ++i)
	{
		Token & curToken = tokenVector.at(i);
		if (curToken.isIdentifier)
		{
			curValue.lastIdentifier = static_cast<int32_t>(i);
		}
		else
		{
			if (curToken.token[0] == _start)
			{
				++scopeLevel;
				newValue = curValue;
				valueVec.push_back(newValue);	//store the current settings, so when this new function doesnt happen to be the 'real' one, we can restore everything
				
				curValue.scopeLevel = scopeLevel;
				if (i > 0 && curValue.lastIdentifier == static_cast<int32_t>(i) - 1)
				{	//identifier must be right before (, else we have some expression like "( x + y() )"
					curValue.lastFunctionIdentifier = curValue.lastIdentifier;
					curValue.param = 0;
				}
				else
				{	//some expression
					curValue.lastFunctionIdentifier = -1;
				}
			}
			else if (curToken.token[0] == _param && curValue.lastFunctionIdentifier > -1)
			{
				++curValue.param;
			}
			else if (curToken.token[0] == _stop)
			{
				if (scopeLevel)	//scope cannot go below -1
					scopeLevel--;
				if (valueVec.size() > 0)
				{	//only pop level if scope was of actual function
					curValue = valueVec.back();
					valueVec.pop_back();
				}
				else
				{
					//invalidate curValue
					curValue = FunctionValues();
				}
			}
			else if (curToken.token[0] == _terminal)
			{
				//invalidate everything
				valueVec.clear();
				curValue = FunctionValues();
			}
		}
	}
	
	bool res = false;

	if (curValue.lastFunctionIdentifier == -1)
	{	//not in direct function. Start popping the stack untill we empty it, or a func IS found
		while(curValue.lastFunctionIdentifier == -1 && valueVec.size() > 0)
		{
			curValue = valueVec.back();
			valueVec.pop_back();
		}
	}
	if (curValue.lastFunctionIdentifier > -1)
	{
		Token funcToken = tokenVector.at(curValue.lastFunctionIdentifier);
		funcToken.token[funcToken.length] = 0;
		_currentParam = curValue.param;

		bool same = false;
		if (_funcName)
		{
			if(_ignoreCase)
				same = testNameNoCase(_funcName, funcToken.token, lstrlen(_funcName)) == 0;
			else
				same = generic_strncmp(_funcName, funcToken.token, lstrlen(_funcName)) == 0;
		}
		if (!same)
		{	//check if we need to reload data
			if (_funcName)
			{
				delete [] _funcName;
			}
			_funcName = new TCHAR[funcToken.length+1];
			lstrcpy(_funcName, funcToken.token);
			res = loadFunction();
		}
		else
		{
			res = true;
		}
	}
	return res;
}
Esempio n. 11
0
/**
 * @version
 * - JR Lewis       pre-2012.03.10
 *   - Initial version.
 */ 
Element Token_::Lex_(CharacterStream& in) const
{
    in.Consume();
    return Token(this->token);
}
Esempio n. 12
0
Element Token_::Interpret_(Environment& )
{
    return Token(this);
}
Esempio n. 13
0
int Database::FTSOpen(
    sqlite3_tokenizer* pTokenizer,
    const char* input,
    int bytes,
    sqlite3_tokenizer_cursor** cursor) {
    UnicodeTokenizerCursor* new_cursor = new UnicodeTokenizerCursor;
    new_cursor->pTokenizer = pTokenizer;
    new_cursor->position = 0;

    QString str = QString::fromUtf8(input, bytes).toLower();
    QChar* data = str.data();
    // Decompose and strip punctuation.
    QList<Token> tokens;
    QString token;
    int start_offset = 0;
    int offset = 0;
    for (int i = 0; i < str.length(); ++i) {
        QChar c = data[i];
        ushort unicode = c.unicode();
        if (unicode <= 0x007f) {
            offset += 1;
        } else if (unicode >= 0x0080 && unicode <= 0x07ff) {
            offset += 2;
        } else if (unicode >= 0x0800) {
            offset += 3;
        }
        // Unicode astral planes unsupported in Qt?
        /*else if (unicode >= 0x010000 && unicode <= 0x10ffff) {
          offset += 4;
        }*/

        if (!data[i].isLetterOrNumber()) {
            // Token finished.
            if (token.length() != 0) {
                tokens << Token(token, start_offset, offset - 1);
                start_offset = offset;
                token.clear();
            } else {
                ++start_offset;
            }
        } else {
            if (data[i].decompositionTag() != QChar::NoDecomposition) {
                token.push_back(data[i].decomposition()[0]);
            } else {
                token.push_back(data[i]);
            }
        }

        if (i == str.length() - 1) {
            if (token.length() != 0) {
                tokens << Token(token, start_offset, offset);
                token.clear();
            }
        }
    }

    new_cursor->tokens = tokens;
    *cursor = reinterpret_cast<sqlite3_tokenizer_cursor*>(new_cursor);

    return SQLITE_OK;
}
Esempio n. 14
0
TEST_F(LexerTest, Radix) {
  EXPECT_THROW(getTokens("0j123")[0], Error);
  EXPECT_THROW(getTokens("0123")[0], Error);
  EXPECT_THROW(getTokens("0x0123")[0], Error);
  EXPECT_EQ(getTokens("0")[0], Token(TT::INTEGER, "0", defaultTrace));
}
Esempio n. 15
0
void XMLFile::AddToken(std::string& text, TokenType type)
{
	m_tokens.push_back(Token(text, type));
	text = "";
}
Esempio n. 16
0
// Internal helper function for the compilation
// This is where the actual compilation is done.
// It is done recursivly for each parenthesis.
int Expression::_compile(int start, Vector<Token>& tokens, Vector<Token>& rpn)
{
	int i;
	Stack<eTokenType> opStack;
	eTokenType lastTok = NONE;
	for(i = start; i < tokens.size(); i++) 
	{
		if(tokens[i].type == RPAREN) break;

		switch(tokens[i].type) 
		{
		case PLUS:
		case MINUS:
			// do some simple optimizations of the
			// expression (instead of having unary 
			// operations for these kind of series)
			if(lastTok==MINUS) 
			{
				if(tokens[i].type==MINUS) 
				{
					opStack.pop();
					opStack.push(PLUS);
					break;
				}
			} 
			else if(lastTok==PLUS) 
			{
				if(tokens[i].type==MINUS) 
				{
					opStack.pop();
					opStack.push(MINUS);
					break;
				}						
			} 
			else 
			{
				// if next token is a number and this
				// token is a minus (unary) then just negate the number
				if(	i+1<tokens.size() && 
					IS_EVALUATABLE_LEFT(tokens[i+1].type) &&
					!(IS_EVALUATABLE_RIGHT(lastTok)) && 
					tokens[i].type == MINUS) 
				{
					opStack.push(MINUS);
					rpn.add(Token(NUMBER, 0.0));
					break;
				}
			}
		case DIV:
		case MUL:
			// just push the operator on the operator stack.
			opStack.push(tokens[i].type);
			break;

		case LPAREN:
		case FUNCTION:
		case VARIABLE:
		case NUMBER:
			// if we found a left parenthesis lets recurse into it and increment our
			// index with the amount of tokens that has been parsed through the
			// recursion. Else just add the number to the rpn expression.
			if(tokens[i].type==LPAREN||tokens[i].type==FUNCTION) 
			{
				if(tokens[i].type==FUNCTION) 
				{
					int last_i = i; 
					i+=_compile(i+1, tokens, rpn)+1;
					rpn.add(tokens[last_i]);
				} 
				else 
				{
					i+=_compile(i+1, tokens, rpn)+1;
				}
			} 
			else 
			{
				rpn.add(tokens[i]);
			}

			// if our operator stack isn't empty lets peek the last operator
			// and see if it is time to add it to the rpn (according to predecence)
			if(!opStack.empty()) 
			{
				eTokenType lastOp = opStack.peek();
				eTokenType nextOp = NONE;
				if(i+1<tokens.size())
					nextOp = tokens[i+1].type;
				if((lastOp == PLUS || lastOp == MINUS) &&
					(nextOp == PLUS || nextOp == MINUS || nextOp == RPAREN || nextOp == NONE)) 
				{
					rpn.add(Token(lastOp));
					opStack.pop();
				}
				else if((lastOp == MUL || lastOp == DIV)) 
				{
					rpn.add(Token(lastOp));
					opStack.pop();
					if(nextOp == PLUS || nextOp == MINUS || nextOp == RPAREN || nextOp == NONE) 
					{	
						// nothing has lower predecence than these operators so just empty 
						// the operator stack and add the operators to the rpn expression
						while(opStack.empty()!=true) 
						{
							rpn.add(Token(opStack.peek()));
							opStack.pop();
						}					
					}
				}
			}
			break;
		case RPAREN:
		case END:
		case NONE:
			;	// do nothing
		}

		lastTok = tokens[i].type;
	}

	// add the remaining operators to the rpn expression
	while(opStack.empty()!=true) 
	{
		rpn.add(Token(opStack.peek()));
		opStack.pop();
	}

	return i-start;
}
Esempio n. 17
0
TEST_F(LexerTest, FloatLiterals) {
  EXPECT_EQ(getTokens("12.3")[0], Token(TT::FLOAT, "12.3", defaultTrace));
  EXPECT_THROW(getTokens("12.")[0], Error);
  EXPECT_THROW(getTokens("12.123.")[0], Error);
  EXPECT_THROW(getTokens("0x12.123")[0], Error);
}
Esempio n. 18
0
TEST_F(LexerTest, Keywords) {
  EXPECT_EQ(getTokens("define")[0], Token(TT::DEFINE, "define", defaultTrace));
  EXPECT_EQ(getTokens("function")[0], Token(TT::FUNCTION, "function", defaultTrace));
  EXPECT_EQ(getTokens("protected")[0], Token(TT::PROTECT, "protected", defaultTrace));
}
Esempio n. 19
0
// this function takes a string containing an arithmetic 
// expression in infix notation and compiles it into an 
// internal reverse polish notation representation.
// which can then easily be evaluated (and re-evaluated)
// This is done by first tokenizing the string 
// and then parse the tokens, converting 
// the infix notation to rpn.
bool Expression::compile(const char *string) 
{
	const char *src = string;
	Vector<Token> tokens;

	// tokenize it and parse and convert numbers.
	while(*src) 
	{
		if(IS_WHITESPACE(*src)) 
		{
			src++;
			continue;
		}
		switch(*src) 
		{
		case '(': tokens.add(Token(LPAREN)); break;
		case ')': tokens.add(Token(RPAREN)); break;
		case '-': tokens.add(Token(MINUS)); break;
		case '+': tokens.add(Token(PLUS)); break;
		case '*': tokens.add(Token(MUL)); break;
		case '/': tokens.add(Token(DIV)); break;
		default:
			if(IS_NUMBER(*src)) 
				{
					char num[32];
					num[0] = *src;
					src++;
					char *num_dest = num+1;
					int numDots = 0;
					while(IS_NUMBER(*src)||*src=='.')
					{
						if(*src=='.')
						{
							numDots++;
							if(numDots>1)
							{
								// error
								printf("wrong amount of dots in constant number.");
								return false;
							}
						}

						*num_dest = *src;
						num_dest++;
						src++;
					}
					*num_dest = 0;
					float i = atof(num);
					tokens.add(Token(NUMBER, i));
					continue;
				}
			else if(IS_LETTER(*src)) 
			{
				char litteral[255];
				litteral[0] = *src;
				src++;
				char *litteral_dest = litteral+1;
				while(IS_LETTER(*src)||IS_NUMBER(*src)) 
				{
					*litteral_dest = *src;
					litteral_dest++;
					src++;
				}
				*litteral_dest = 0;

				while(IS_WHITESPACE(*src)) src++;

				if(*src=='(') 
				{
					tokens.add(Token(FUNCTION, scope.getFunction(litteral)));
					src++;
				} 
				else 
				{
					tokens.add(Token(VARIABLE, scope.getVariable(litteral)));
				}

				continue;					
			}
			else return false;
		}
		src++;
	}

	// compile! this is done recursivly 
	rpn.clear();
	_compile(0, tokens, rpn);

	rpn.add(Token(END));
	return true;
}
Esempio n. 20
0
TEST_F(LexerTest, Constructs) {
  EXPECT_EQ(getTokens(";")[0], Token(TT::SEMI, ";", defaultTrace));
  EXPECT_EQ(getTokens("]")[0], Token(TT::SQPAREN_RIGHT, "]", defaultTrace));
}
Esempio n. 21
0
TEST_F(LexerTest, FatArrow) {
  ASSERT_EQ(getTokens("=>")[0], Token(TT::FAT_ARROW, "=>", defaultTrace));
}
Esempio n. 22
0
TEST_F(LexerTest, BooleanLiterals) {
  EXPECT_EQ(getTokens("true")[0], Token(TT::BOOLEAN, "true", defaultTrace));
  EXPECT_EQ(getTokens("false")[0], Token(TT::BOOLEAN, "false", defaultTrace));
}
Esempio n. 23
0
TEST_F(LexerTest, StringLiterals) {
  EXPECT_EQ(getTokens("\"qwerty123\"")[0], Token(TT::STRING, "qwerty123", defaultTrace));
  EXPECT_THROW(getTokens("\"qwerty123")[0], Error);
}
Esempio n. 24
0
	namespace consts {
		const Token none = Token(token::type::word, "none");
		const Token bool_true = Token(token::type::word, "true");
		const Token bool_false = Token(token::type::word, "false");
		const Token parenthesis_open = Token(token::type::symbol, "(");
		const Token parenthesis_close = Token(token::type::symbol, ")");
		const Token argument_separator = Token(token::type::symbol, ",");
		const Token line_end = Token(token::type::symbol, ";");
		const Token access = Token(token::type::symbol, ".");
		const Token in = Token(token::type::symbol, "in");
		const Token block_open = Token(token::type::symbol, "{");
		const Token block_close = Token(token::type::symbol, "}");
		const Token function_def = Token(token::type::word, "fun");
		const Token function_ret = Token(token::type::word, "return");
	}
Esempio n. 25
0
void ExpressionParser::tokenize(const String & expression)
{
    bool stateMemory = false;
    size_t len = expression.length();
    for(size_t i = 0; i < len; i++)
    {
        char ch = expression[i];
        switch(ch)
        {
        case '[':
        {
            stateMemory = true;
            _curToken += ch;
        }
        break;

        case ']':
        {
            stateMemory = false;
            _curToken += ch;
        }
        break;

        default:
        {
            if(stateMemory)
                _curToken += ch;
            else
            {
                switch(ch)
                {
                case '(':
                    addOperatorToken(ch, Token::Type::OpenBracket);
                    break;
                case ')':
                    addOperatorToken(ch, Token::Type::CloseBracket);
                    break;
                case '~':
                    addOperatorToken(ch, Token::Type::OperatorNot);
                    break;
                case '*':
                    addOperatorToken(ch, Token::Type::OperatorMul);
                    break;
                case '`':
                    addOperatorToken(ch, Token::Type::OperatorHiMul);
                    break;
                case '/':
                    addOperatorToken(ch, Token::Type::OperatorDiv);
                    break;
                case '%':
                    addOperatorToken(ch, Token::Type::OperatorMod);
                    break;
                case '+':
                    if(!isUnaryOperator())   //skip all unary add operators
                        addOperatorToken(ch, Token::Type::OperatorAdd);
                    break;
                case '-':
                    if(isUnaryOperator())
                        addOperatorToken(ch, Token::Type::OperatorUnarySub);
                    else
                        addOperatorToken(ch, Token::Type::OperatorSub);
                    break;
                case '<':
                    addOperatorToken(ch, Token::Type::OperatorShl);
                    break;
                case '>':
                    addOperatorToken(ch, Token::Type::OperatorShr);
                    break;
                case '&':
                    addOperatorToken(ch, Token::Type::OperatorAnd);
                    break;
                case '^':
                    addOperatorToken(ch, Token::Type::OperatorXor);
                    break;
                case '|':
                    addOperatorToken(ch, Token::Type::OperatorOr);
                    break;
                case ' ': //ignore spaces
                    break;
                default:
                    _curToken += ch;
                    break;
                }
            }
        }
        break;
        }
    }
    if(_curToken.length() != 0)  //make sure the last token is added
        _tokens.push_back(Token(_curToken, Token::Type::Data));
}
Esempio n. 26
0
TEST_F(LexerTest, IntegerLiterals) {
  EXPECT_EQ(getTokens("123")[0], Token(TT::INTEGER, "123", defaultTrace));
  EXPECT_EQ(getTokens("0xA")[0], Token(TT::INTEGER, "10", defaultTrace));
  EXPECT_EQ(getTokens("0o10")[0], Token(TT::INTEGER, "8", defaultTrace));
  EXPECT_EQ(getTokens("0b10")[0], Token(TT::INTEGER, "2", defaultTrace));
}
Esempio n. 27
0
TEST_F(LexerTest, CommentsMultiLine) {
  ASSERT_EQ(getTokens("/*asdad\ndasd\nasd*/"), std::vector<Token> {Token(TT::FILE_END, "", defaultTrace)});
  ASSERT_EQ(lx.getLineCount(), 3);
}
Esempio n. 28
-1
	Token *Scanner::PushToken(Token::TYPE type)
	{
		m_tokens.push(Token(type, INPUT.mark()));
		return &m_tokens.back();
	}
Esempio n. 29
-1
        Token MapTokenEmitter::doEmit(Tokenizer& tokenizer) {
            while (!tokenizer.eof()) {
                size_t line = tokenizer.line();
                size_t column = tokenizer.column();
                const char* c = tokenizer.nextChar();
                switch (*c) {
                    case '/':
                        if (tokenizer.peekChar() == '/') {
                            tokenizer.nextChar();
                            if (tokenizer.peekChar() == '/') {
                                tokenizer.nextChar(); // it's a TB comment
                            } else {
                                // eat everything up to and including the next newline
                                while (*tokenizer.nextChar() != '\n');
                            }
                        }
                        break;
                    case '{':
                        return Token(TokenType::OBrace, c, c + 1, tokenizer.offset(c), line, column);
                    case '}':
                        return Token(TokenType::CBrace, c, c + 1, tokenizer.offset(c), line, column);
                    case '(':
                        return Token(TokenType::OParenthesis, c, c + 1, tokenizer.offset(c), line, column);
                    case ')':
                        return Token(TokenType::CParenthesis, c, c + 1, tokenizer.offset(c), line, column);
                    case '[':
                        return Token(TokenType::OBracket, c, c + 1, tokenizer.offset(c), line, column);
                    case ']':
                        return Token(TokenType::CBracket, c, c + 1, tokenizer.offset(c), line, column);
                    case '"': { // quoted string
                        const char* begin = c;
                        const char* end;
                        tokenizer.quotedString(begin, end);
                        return Token(TokenType::String, begin, end, tokenizer.offset(begin), line, column);
                    }
                    default: { // whitespace, integer, decimal or word
                        if (isWhitespace(*c))
                            break;
                        
                        const char* begin = c;

                        // try to read a number
                        if (*c == '-' || isDigit(*c)) {
                            while (isDigit(*(c = tokenizer.nextChar())));
                            if (isDelimiter(*c)) {
                                if (!tokenizer.eof())
                                    tokenizer.pushChar();
                                return Token(TokenType::Integer, begin, c, tokenizer.offset(begin), line, column);
                            }
                        }
                        
                        // try to read a decimal (may start with '.')
                        if (*c == '.') {
                            while (isDigit(*(c = tokenizer.nextChar())));
                            if (isDelimiter(*c)) {
                                if (!tokenizer.eof())
                                    tokenizer.pushChar();
                                return Token(TokenType::Decimal, begin, c, tokenizer.offset(begin), line, column);
                            }
                        }
                        
                        // try to read decimal in scientific notation
                        if (*c == 'e') {
                            c = tokenizer.nextChar();
                            if (isDigit(*c) || *c == '+' || *c == '-') {
                                while (isDigit(*(c = tokenizer.nextChar())));
                                if (isDelimiter(*c)) {
                                    if (!tokenizer.eof())
                                        tokenizer.pushChar();
                                    return Token(TokenType::Decimal, begin, c, tokenizer.offset(begin), line, column);
                                }
                            }
                        }
                        
                        // read a word
                        while (!tokenizer.eof() && !isDelimiter(*(c = tokenizer.nextChar())));
                        if (!tokenizer.eof())
                            tokenizer.pushChar();
                        return Token(TokenType::String, begin, c, tokenizer.offset(begin), line, column);
                    }
                }
            }
            return Token(TokenType::Eof, NULL, NULL, 0, tokenizer.line(), tokenizer.column());
        }
Esempio n. 30
-14
 TokenSIPVersion(void) : TokenAbstract("SIPVersion"), 
   _sequence(Token("SIP/"), TokenDigits(), Token("."), TokenDigits())
 {
   _sequence.disable_factory(true);
 }