예제 #1
0
// get a parsed line.
// if no more lines exist, returns false
bool Parser::getLine(std::vector<Token> *o_tokens)
{
	o_tokens->clear();
	m_lineNumber = m_internalLineNumber;

	tstringi line;
	bool isTokenExist = false;
 continue_getLineLoop:
	while (getLine(&line))
	{
		const _TCHAR *t = line.c_str();

 continue_getTokenLoop:
		while (true)
		{
			// skip white space
			while (*t != _T('\0') && _istspace(*t))
				t ++;
			if (*t == _T('\0') || *t == _T('#'))
				goto break_getTokenLoop; // no more tokens exist
			if (*t == _T('\\') && *(t + 1) == _T('\0'))
				goto continue_getLineLoop; // continue to next line
      
			const _TCHAR *tokenStart = t;
      
			// comma or empty token
			if (*t == _T(','))
			{
				if (!isTokenExist)
					o_tokens->push_back(Token(_T(""), false));
				isTokenExist = false;
				o_tokens->push_back(Token(Token::Type_comma));
				t ++;
				goto continue_getTokenLoop;
			}

			// paren
			if (*t == _T('('))
			{
				o_tokens->push_back(Token(Token::Type_openParen));
				isTokenExist = false;
				t ++;
				goto continue_getTokenLoop;
			}
			if (*t == _T(')'))
			{
				if (!isTokenExist)
					o_tokens->push_back(Token(_T(""), false));
				isTokenExist = true;
				o_tokens->push_back(Token(Token::Type_closeParen));
				t ++;
				goto continue_getTokenLoop;
			}

			isTokenExist = true;
      
			// prefix
			if (m_prefixes)
				for (size_t i = 0; i < m_prefixes->size(); i ++)
					if (_tcsnicmp(tokenStart, m_prefixes->at(i).c_str(),
								  m_prefixes->at(i).size()) == 0)
					{
						o_tokens->push_back(Token(m_prefixes->at(i), false));
						t += m_prefixes->at(i).size();
						goto continue_getTokenLoop;
					}

			// quoted or regexp
			if (*t == _T('"') || *t == _T('\'') ||
				*t == _T('/') || (*t == _T('\\') && *(t + 1) == _T('m') &&
								  *(t + 2) != _T('\0')))
			{
				bool isRegexp = !(*t == _T('"') || *t == _T('\''));
				_TCHAR q[2] = { *t++, _T('\0') }; // quote character
				if (q[0] == _T('\\'))
				{
					t++;
					q[0] = *t++;
				}
				tokenStart = t;
	
				while (*t != _T('\0') && *t != q[0])
				{
					if (*t == _T('\\') && *(t + 1))
						t ++;
					if (_istlead(*t) && *(t + 1))
						t ++;
					t ++;
				}
	
				tstring str =
					interpretMetaCharacters(tokenStart, t - tokenStart, q, isRegexp);
#ifdef _MBCS
				if (isRegexp)
					str = guardRegexpFromMbcs(str.c_str());
#endif
				// concatinate continuous string
				if (!isRegexp &&
					0 < o_tokens->size() && o_tokens->back().isString() &&
					o_tokens->back().isQuoted())
					o_tokens->back().add(str);
				else
					o_tokens->push_back(Token(str, true, isRegexp));
				if (*t != _T('\0'))
					t ++;
				goto continue_getTokenLoop;
			}

			// not quoted
			{
				while (isSymbolChar(*t))
				{
					if (*t == _T('\\'))
						if (*(t + 1))
							t ++;
						else
							break;
					if (_istlead(*t) && *(t + 1))
						t ++;
					t ++;
				}
				if (t == tokenStart)
				{
					ErrorMessage e;
					e << _T("invalid character ");
#ifdef UNICODE
					e << _T("U+");
					e << std::hex; // << std::setw(4) << std::setfill(_T('0'));
					e << (int)(wchar_t)*t;
#else
					e << _T("\\x");
					e << std::hex; // << std::setw(2) << std::setfill(_T('0'));
					e << (int)(u_char)*t;
#endif
					e << std::dec;
					if (_istprint(*t))
						e << _T("(") << *t << _T(")");
					throw e;
				}
	
				_TCHAR *numEnd = NULL;
				long value = _tcstol(tokenStart, &numEnd, 0);
				if (tokenStart == numEnd)
				{
					tstring str = interpretMetaCharacters(tokenStart, t - tokenStart);
					o_tokens->push_back(Token(str, false));
				}
				else
				{
					o_tokens->push_back(
										Token(value, tstringi(tokenStart, numEnd - tokenStart)));
					t = numEnd;
				}
				goto continue_getTokenLoop;
			}
		}
 break_getTokenLoop:
		if (0 < o_tokens->size())
			break;
		m_lineNumber = m_internalLineNumber;
		isTokenExist = false;
	}
  
	return 0 < o_tokens->size();
}
예제 #2
0
파일: lex.cpp 프로젝트: pgregory/tumbleweed
tokentype Lexer::nextToken()
{   
    bool sign;
    std::string strToken;

    /* skip over blanks and comments */
    while(nextChar() && (isspace(m_cc) || (m_cc == '"')))
    {
        if (m_cc == '"') {
            /* read comment */
            while (nextChar() && (m_cc != '"')) ;
            if (! m_cc) break;    /* break if we run into eof */
        }
    }

    strToken.clear();
    strToken.push_back(m_cc);

    if (! m_cc)           /* end of input */
        m_currentToken = inputend;
    else if (isalpha(m_cc)) 
    {     /* identifier */
        while (nextChar() && isalnum(m_cc))
            strToken.push_back(m_cc);
        if (m_cc == ':') 
        {
            strToken.push_back(m_cc);
            m_currentToken = namecolon;
        }
        else 
        {
            pushBack(m_cc);
            m_currentToken = nameconst;
        }
    }
    else if (isdigit(m_cc)) 
    {     /* number */
        long longresult = m_cc - '0';
        while (nextChar() && isdigit(m_cc)) 
        {
            strToken.push_back(m_cc);
            longresult = (longresult * 10) + (m_cc - '0');
        }
        if (longCanBeInt(longresult)) 
        {
            m_tokenInteger = longresult;
            m_currentToken = intconst;
        }
        else 
        {
            m_currentToken = floatconst;
            m_tokenFloat = (double) longresult;
        }
        if (m_cc == '.') 
        {    /* possible float */
            if (nextChar() && isdigit(m_cc)) 
            {
                strToken.push_back('.');
                do
                    strToken.push_back(m_cc);
                while (nextChar() && isdigit(m_cc));
                if (m_cc) pushBack(m_cc);
                m_currentToken = floatconst;
                strToken.push_back('\0');
                m_tokenFloat = atof(strToken.c_str());
                printf("lex: %s\n", strToken.c_str());
            }
            else 
            {
                /* nope, just an ordinary period */
                if (m_cc) pushBack(m_cc);
                pushBack('.');
            }
        }
        else
            pushBack(m_cc);

        if (nextChar() && m_cc == 'e') 
        {  /* possible float */
            if (nextChar() && m_cc == '-') 
            {
                sign = true;
                nextChar();
            }
            else
                sign = false;
            if (m_cc && isdigit(m_cc)) 
            { /* yep, its a float */
                strToken.push_back('e');
                if (sign) 
                    strToken.push_back('-');
                while (m_cc && isdigit(m_cc)) 
                {
                    strToken.push_back(m_cc);
                    nextChar();
                }
                if (m_cc) 
                    pushBack(m_cc);
                m_currentToken = floatconst;
                m_tokenFloat = atof(strToken.c_str());
            }
            else 
            {  /* nope, wrong again */
                if (m_cc) pushBack(m_cc);
                if (sign) pushBack('-');
                pushBack('e');
            }
        }
        else
            if (m_cc) pushBack(m_cc);
    }
    else if (m_cc == '$') 
    {       /* character constant */
        m_tokenInteger = (int) nextChar();
        m_currentToken = charconst;
    }
    else if (m_cc == '#') 
    {       /* symbol */
        strToken.resize(strToken.size()-1); // erase pound sign
        if (nextChar() == '(')
            m_currentToken = arraybegin;
        else 
        {
            pushBack(m_cc);
            while (nextChar() && isSymbolChar(m_cc))
                strToken.push_back(m_cc);
            pushBack(m_cc);
            m_currentToken = symconst;
        }
    }
    else if (m_cc == '\'') 
    {      /* string constant */
        strToken.resize(strToken.size()-1);
strloop:
        while (nextChar() && (m_cc != '\''))
            strToken.push_back(m_cc);
        /* check for nested quote marks */
        if (m_cc && nextChar() && (m_cc == '\'')) 
        {
            strToken.push_back(m_cc);
            goto strloop;
        }
        pushBack(m_cc);
        m_currentToken = strconst;
    }
    else if (isClosing(m_cc))     /* closing expressions */
        m_currentToken = closing;
    else if (singleBinary(m_cc)) 
    {    /* single binary expressions */
        m_currentToken = binary;
    }
    else 
    {              /* anything else is binary */
        if (nextChar() && binarySecond(m_cc))
            strToken.push_back(m_cc);
        else
            pushBack(m_cc);
        m_currentToken = binary;
    }

    m_tokenString = strToken;
    return(m_currentToken);
}