Beispiel #1
0
int Lexer::nextTokenKind()
{
    int token = Parser::Token_INVALID;
    if ( m_curpos >= m_contentSize )
    {
        return 0;
    }
    QChar* it = m_content.data();
    it += m_curpos;
    switch ( state() )
    {
        case VariableValueState:
            it = ignoreWhitespaceAndComment( it );
            m_tokenBegin = m_curpos;
            if( m_curpos < m_contentSize )
            {
                if( it->unicode() == '\n' )
                {
                    popState();
                    createNewline( m_curpos );
                    token = Parser::Token_NEWLINE;
                }else if( it->unicode() == '\\' && isCont(it) )

                {
                    pushState(ContState);
                    token = Parser::Token_CONT;
                }else if( it->unicode() == '"')
                {
                    it++;
                    m_curpos++;
                    QChar* lastit = it;
                    while( ( it->unicode() != '"' || lastit->unicode() == '\\' && it->unicode() == '"' ) && it->unicode() != '\n' && it->unicode() != '#' && !isCont( it ) && m_curpos < m_contentSize )
                    {
                        lastit = it;
                        it++;
                        m_curpos++;
                    }
                    if( it->unicode() != '"' && it->unicode() != '#' )
                    {
                        m_curpos--;
                    }
                    token = Parser::Token_VALUE;
                    if( it->unicode() == '#' )
                    {
                        m_tokenEnd = m_curpos - 1;
                        do
                        {
                            it++;
                            m_curpos++;
                        }while( it->unicode() != '\n' && m_curpos < m_contentSize );
                        if( it->unicode() == '\n')
                        {
                            m_curpos--;
                        }
                        return token;
                    }
                }else if( it->unicode() == '(' )
                {
                    unsigned int bracecount = 0;
                    while( ( it->unicode() != ';' || bracecount > 0 ) && it->unicode() != '\n' && !isCont( it )  && m_curpos < m_contentSize )
                    {
                        if( it->unicode() == '(' )
                        {
                            bracecount++;
                        }else if( it->unicode() == ')' && bracecount > 0 )
                        {
                            bracecount--;
                        }
                        ++it;
                        ++m_curpos;
                    }
                    if( it->unicode() != ';' )
                    {
                        m_curpos--;
                    }
                    token = Parser::Token_VALUE;
                }else
                {
                    while( !it->isSpace() && !isCont(it) && it->unicode() != '#' && m_curpos < m_contentSize )
                    {
                        it++;
                        m_curpos++;
                    }
                    m_curpos--;
                    token = Parser::Token_VALUE;
                }
            }
            break;
        case FunctionArgState:
            m_tokenBegin = m_curpos;
            if( it->unicode() == '\n' )
            {
                createNewline( m_curpos );
                token = Parser::Token_NEWLINE;
            }else if( it->unicode() == '\\' && isCont(it) )
            {
                pushState( ContState );
                token = Parser::Token_CONT;
            }else if( it->unicode() == ',' )
            {
                token = Parser::Token_COMMA;
            }else if( it->unicode() == ')' )
            {
                popState();
                token = Parser::Token_RPAREN;
            }else
            {
                unsigned int parentCount = 0;
                while( parentCount > 0 || ( it->unicode() != ')' && it->unicode() != ',' ) && m_curpos < m_contentSize )
                {
                    if( it->unicode() == ')' )
                    {
                        parentCount--;
                    }else if( it->unicode() == '(' )
                    {
                        parentCount++;
                    }
                    ++it;
                    ++m_curpos;
                }
                m_curpos--;
                token = Parser::Token_VALUE;
            }
            break;
        case ContState:
            it = ignoreWhitespaceAndComment( it );
            m_tokenBegin = m_curpos;
            if( m_curpos < m_contentSize )
            {
                if ( it->unicode() == '\n' )
                {
                    createNewline( m_curpos );
                    token = Parser::Token_NEWLINE;
                    m_tokenEnd = m_curpos;
                    popState();
                    QChar* temp = it;
                    int newpos = m_curpos;
                    do
                    {
                        temp++;
                        newpos++;
                        if(temp->unicode() == '#' )
                        {
                            while( temp->unicode() != '\n' && newpos < m_contentSize )
                            {
                                temp++;
                                newpos++;
                            }
                            createNewline( m_curpos );
                            temp++;
                            m_curpos = newpos;
                            newpos++;
                        }
                    }while( temp->isSpace() && temp->unicode() != '\n' && m_curpos < m_contentSize );
                    m_curpos++;
                    return token;
                }
            }
            break;
        case DefaultState:
            it = ignoreWhitespaceAndComment( it );
            m_tokenBegin = m_curpos;
            if( m_curpos < m_contentSize )
            {
                if ( isBeginIdentifierCharacter( it ) )
                {
                    token = Parser::Token_IDENTIFIER;
                    while ( !it->isSpace() && isIdentifierCharacter( it )  && m_curpos < m_contentSize )
                    {
                        it++;
                        m_curpos++;
                    }
                    if( !isEndIdentifierCharacter( ( it-1 ) ) )
                    {
                        token = Parser::Token_INVALID;
                    }
                    m_curpos--;
                }
                else
                {
                    //Now the stuff that will generate a proper token
                    QChar* c2 = m_curpos < m_contentSize ? it + 1 : 0 ;
                    switch ( it->unicode() )
                    {
                        case '|':
                            token = Parser::Token_OR;
                            break;
                        case '!':
                            token = Parser::Token_EXCLAM;
                            break;
                        case '(':
                            pushState( FunctionArgState );
                            token = Parser::Token_LPAREN;
                            break;
                        case '{':
                            token = Parser::Token_LBRACE;
                            break;
                        case '}':
                            token = Parser::Token_RBRACE;
                            break;
                        case ':':
                            token = Parser::Token_COLON;
                            break;
                        case '~':
                            if ( c2 && c2->unicode() == '=' )
                            {
                                pushState( VariableValueState );
                                m_curpos++;
                                token = Parser::Token_TILDEEQ;
                            }
                            break;
                        case '*':
                            if ( c2 && c2->unicode() == '=' )
                            {
                                pushState( VariableValueState );
                                m_curpos++;
                                token = Parser::Token_STAREQ;
                            }
                            break;
                        case '-':
                            if ( c2 && c2->unicode() == '=' )
                            {
                                pushState( VariableValueState );
                                m_curpos++;
                                token = Parser::Token_MINUSEQ;
                            }
                            break;
                        case '+':
                            if ( c2 && c2->unicode() == '=' )
                            {
                                pushState( VariableValueState );
                                m_curpos++;
                                token = Parser::Token_PLUSEQ;
                            }
                            break;
                        case '=':
                            pushState( VariableValueState );
                            token = Parser::Token_EQUAL;
                            break;
                        case '\n':
                            createNewline( m_curpos );
                            token = Parser::Token_NEWLINE;
                            break;
                        default:
                            break;
                    }
                }
            }
            break;
        default:
            token = Parser::Token_INVALID;
            break;
    }
    if ( m_curpos >= m_contentSize )
    {
        return 0;
    }
    m_tokenEnd = m_curpos;
    m_curpos++;
    return token;
}
int Lexer::nextTokenKind()
{
    int token = Parser::Token_INVALID;
    if (m_curpos >= m_contentSize) {
        m_tokenBegin = -1;
        m_tokenEnd = -1;
        createNewline(m_curpos);
        return 0;
    }
    QChar* it = m_content.data();
    it += m_curpos;
    m_tokenBegin = m_curpos;

    if (it->isSpace())
    {
        token = Parser::Token_WHITESPACE;
        while (m_curpos < m_contentSize && it->isSpace())
        {
            if (it->unicode() == '\n')
            {
                createNewline(m_curpos);
            }
            it++;
            m_curpos++;
        }
        m_curpos--;
    }
    else if (it->isDigit())
    {
	QRegExp regex("\\d+\\.\\d+|\\d+\\.\\d+|\\d+\\.\\d+e-?\\d+|\\d+\\.\\d+e-?\\d+|[\\dABCDEF]+#\\d{1,2}|\\d+");
	
	if ( regex.indexIn(m_content, m_curpos) != -1)
	{
	  kDebug() << "Matched: " << regex.cap();
	  
	  m_curpos += regex.matchedLength() - 1;
	  token = Parser::Token_INTEGER_LITERAL;
	}
    }
    else if (it->unicode() == '-')
    {
	  if ((it + 1)->unicode() == '>')
	  {
	      m_curpos++;
	      token = Parser::Token_LEADS_TO;
	  }
	  else  if ((it + 1)->unicode() == '-')
	  {
	      m_curpos++;
	      token = Parser::Token_LIST_DIFFERENCE;
	  }
	  else
	  {
	      token = Parser::Token_MINUS;
	  }
    }    
    else if (it->unicode() == '+')
    {
	if ((it + 1)->unicode() == '+')
        {
            m_curpos++;
            token = Parser::Token_LIST_ADDITION;
        }
        else
        {
            token = Parser::Token_PLUS;
        }
    }
    else if (it->unicode() == '$' && processCharLiteral(it + 1))
    {
        token = Parser::Token_CHAR_LITERAL;
    }
    else if (it->unicode() == '}')
    {
        token = Parser::Token_RBRACE;
        if (state() == Tuple)
        {
            popState();
        }
    }
    else if (it->unicode() == '_')
    {
        token = Parser::Token_UNDERLINE;
    }
    else if (it->unicode() == '{')
    {
        token = Parser::Token_LBRACE;
        pushState(Tuple);
    }
    else if (it->unicode() == ')')
    {
        token = Parser::Token_RPAREN;
    }
    else if (it->unicode() == '(')
    {
        token = Parser::Token_LPAREN;
    }
    else if (it->unicode() == ']')
    {
        token = Parser::Token_RBRACKET;
    }
    else if (it->unicode() == '[')
    {
        token = Parser::Token_LBRACKET;
    }
    else if (it->unicode() == ',')
    {
        token = Parser::Token_COMMA;
    }
    else if (it->unicode() == '#')
    {
        token = Parser::Token_POUND_SIGN;
    }
    else if (it->unicode() == '!')
    {
        token = Parser::Token_EXCLAMATION;
    }
    else if (it->unicode() == '<')
    {
        if ( (it + 1)->unicode() == '-')
        {
            m_curpos += 1;
            token = Parser::Token_LIST_COMPREHENSION_LEADS_TO;
        }
        else if ( (it + 1)->unicode() == '<' )
        {
            m_curpos += 1;
            token = Parser::Token_BINARY_OPEN;
        }
        else
        {
            token = Parser::Token_IS_SMALLER;
        }
    }
    else if (it->unicode() == '>')
    {
        if ((it + 1)->unicode() == '=')
        {
            m_curpos++;
            token = Parser::Token_IS_GREATER_OR_EQUAL;
        }
        else if ( (it + 1)->unicode() == '>' )
        {
            m_curpos += 1;
            token = Parser::Token_BINARY_CLOSE;
        }
        else
        {
            token = Parser::Token_IS_GREATER;
        }
    }
    else if (it->unicode() == '?')
    {
        token = Parser::Token_QUESTION;
    }
    else if (it->unicode() == '%')
    {
        token = Parser::Token_COMMENT;
        while (it->unicode() != '\n')
        {
            m_curpos++;
            it++;
        }
        createNewline(m_curpos);
    }
    else if (it->unicode() == '/')
    {
        if ((it + 1)->unicode() == '=')
        {
            token = Parser::Token_IS_NOT_EQUAL;
        }
        else
        {
            token = Parser::Token_FORWARD_SLASH;
        }
    }
    else if (it->unicode() == '*')
    {
        token = Parser::Token_MUL;
    }
    else if (it->unicode() == '|')
    {
        if ((it + 1)->unicode() == '|')
        {
            m_curpos++;
            token = Parser::Token_DOUBLE_PIPE;
        }
        else
        {
            token = Parser::Token_PIPE;
        }
    }
    else if (it->unicode() == ':')
    {
        if ( (it + 1)->unicode() == ':')
        {
            m_curpos += 1;
            token = Parser::Token_DOUBLE_COLON;
        }
        else
        {
            token = Parser::Token_COLON;
        }
    }
    else if (it->unicode() == '.')
    {
        token = Parser::Token_DOT;
    }
    else if (it->unicode() == ';')
    {
        token = Parser::Token_SEMICOLON;
    }
    else if (it->unicode() == '\'')
    {
        token = Parser::Token_ATOM_LITERAL;
        it++;
        m_curpos++;
        int startPos = m_curpos;
        while (m_curpos < m_contentSize
                && (it->unicode() != '\'')) {
            if (it->unicode() == '\n') createNewline(m_curpos);
            it++;
            m_curpos++;
        }
        // if the string is never terminated, make sure we don't overflow the boundaries
        if ( m_curpos == m_contentSize ) {
            --m_curpos;
        }
    }
    else if (it->unicode() == '"')
    {
        token = Parser::Token_STRING_LITERAL;
        it++;
        m_curpos++;
        while (m_curpos < m_contentSize
                && (it->unicode() != '\"')) {
            if (it->unicode() == '\n') createNewline(m_curpos);
            it++;
            m_curpos++;
        }
        // if the string is never terminated, make sure we don't overflow the boundaries
        if ( m_curpos == m_contentSize ) {
            --m_curpos;
        }
    }
    else if (it->unicode() == '=')
    {
        if ((it + 1)->unicode() == '=')
        {
            m_curpos++;
            token = Parser::Token_IS_EQUAL;
        }
        else if ((it + 1)->unicode() == '<') {
            m_curpos++;
            token = Parser::Token_IS_SMALLER_OR_EQUAL;
        }
        else if ((it + 1)->unicode() == ':' && (it + 2)->unicode() == '=')
        {
            m_curpos += 2;
            token = Parser::Token_EXACT_EQUATIONAL;
        }
        else if ((it + 1)->unicode() == '/' && (it + 2)->unicode() == '=')
        {
            m_curpos += 2;
            token = Parser::Token_EXACT_NOT_EQUATIONAL;
        }
        else
        {
            token = Parser::Token_ASSIGN;
        }
    }
    else if (it->isLetter() && it->isUpper())
    {
        QString name;
        while (m_curpos < m_contentSize && (isValidVariableIdentifier(it))) {
            name.append(*it);
            it++;
            m_curpos++;
        }
        m_curpos--;
        token = Parser::Token_VARIABLE;
    }
    else if (it->isLetter() && it->isLower())
    {
        QString name;
        while (m_curpos < m_contentSize && (isValidVariableIdentifier(it))) {
            name.append(*it);
            it++;
            m_curpos++;
        }

        m_curpos--;

        if (name == "after") {
            token = Parser::Token_AFTER;
        } else if (name == "begin") {
            token = Parser::Token_BEGIN;
        } else if (name == "case") {
            token = Parser::Token_CASE;
        } else if (name == "if") {
            token = Parser::Token_IF;
        } else if (name == "catch") {
            token = Parser::Token_CATCH;
        } else if (name == "cond") {
            token = Parser::Token_COND;
        } else if (name == "end") {
            token = Parser::Token_END;
        } else if (name == "fun") {
            token = Parser::Token_FUN;
        } else if (name == "let") {
            token = Parser::Token_LET;
        } else if (name == "of") {
            token = Parser::Token_OF;
        } else if (name == "catch") {
            token = Parser::Token_CATCH;
        } else if (name == "receive") {
            token = Parser::Token_RECEIVE;
        } else if (name == "when") {
            token = Parser::Token_WHEN;
        } else if (name == "div") {
            token = Parser::Token_INT_DIV;
        } else if (name == "rem") {
            token = Parser::Token_INT_REM;
        } else if (name == "or") {
            token = Parser::Token_OR;
        } else if (name == "xor") {
            token = Parser::Token_XOR;
        } else if (name == "bor") {
            token = Parser::Token_BIT_OR;
        } else if (name == "bxor") {
            token = Parser::Token_BIT_XOR;
        } else if (name == "query") {
            token = Parser::Token_QUERY;
        } else if (name == "bsl") {
            token = Parser::Token_SL;
        } else if (name == "bsr") {
            token = Parser::Token_SR;
        } else if (name == "and") {
            token = Parser::Token_AND;
        } else if (name == "band") {
            token = Parser::Token_BIT_AND;
        } else if (name == "not") {
            token = Parser::Token_NOT;
        } else if (name == "bnot") {
            token = Parser::Token_BIT_NOT;
        } else if (name == "orelse") {
            token = Parser::Token_ORELSE;
        } else if (name == "andalso") {
            token = Parser::Token_ANDALSO;
        } else if (name == "module") {
            token = Parser::Token_MODULE_DIRECTIVE;
        } else if (name == "include") {
            token = Parser::Token_INCLUDE_DIRECTIVE;
        } else if (name == "export") {
            token = Parser::Token_EXPORT_DIRECTIVE;
        } else if (name == "file") {
            token = Parser::Token_FILE_DIRECTIVE;
        } else if (name == "record") {
            token = Parser::Token_RECORD_DIRECTIVE;
        } else if (name == "include") {
            token = Parser::Token_INCLUDE_DIRECTIVE;
        } else if (name == "behaviour"){
	  token = Parser::Token_BEHAVIOUR_DIRECTIVE;
	} else if (name == "define"){
	  token = Parser::Token_DEFINE_DIRECTIVE;
	}	
        else
        {
            token = Parser::Token_ATOM_LITERAL;
        }
    }

    m_tokenEnd = m_curpos;
    m_curpos++;

    return token;
}