示例#1
0
bool Tokenizer::SkipBlock(const wxChar& ch)
{
    // skip blocks () [] {} <>
    wxChar match;
    switch (ch)
    {
        case '(': match = ')'; break;
        case '[': match = ']'; break;
        case '{': match = '}'; break;
        case '<': match = '>'; break;
        default : return false;
    }

    MoveToNextChar();
    int nestLevel = 1; // counter for nested blocks (xxx())
    while (NotEOF())
    {
        while (SkipWhiteSpace() || SkipString() || SkipComment())
            ;

        if (CurrentChar() == ch)
            ++nestLevel;
        else if (CurrentChar() == match)
            --nestLevel;

        MoveToNextChar();

        if (nestLevel == 0)
            break;
    }

    return NotEOF();
}
示例#2
0
bool Tokenizer::SkipToEOL(bool nestBraces, bool skippingComment)
{
    // skip everything until we find EOL
    while (1)
    {
        while (NotEOF() && CurrentChar() != '\n')
        {
            if (CurrentChar() == '/' && NextChar() == '*')
            {
                SkipComment(false); // don't skip whitespace after the comment
                if (skippingComment && CurrentChar() == '\n')
                {
                    continue; // early exit from the loop
                }
            }
            if (nestBraces && CurrentChar() == _T('{'))
                ++m_NestLevel;
            else if (nestBraces && CurrentChar() == _T('}'))
                --m_NestLevel;
            MoveToNextChar();
        }
        wxChar last = PreviousChar();
        // if DOS line endings, we 've hit \r and we skip to \n...
        if (last == '\r')
            last = m_Buffer.GetChar(m_TokenIndex - 2);
        if (IsEOF() || last != '\\')
            break;
        else
            MoveToNextChar();
    }
    if (IsEOF())
        return false;
    return true;
}
示例#3
0
void Tokenizer::SkipToEndConditionPreprocessor()
{
    do
    {
        wxChar ch = CurrentChar();
        if (ch == _T('\'') || ch == _T('"') || ch == _T('/') || ch <= _T(' '))
        {
            while (SkipWhiteSpace() || SkipString() || SkipComment())
                ;
            ch = CurrentChar();
        }

        if (ch == _T('#'))
        {
            MoveToNextChar();
            while (SkipWhiteSpace() || SkipComment())
                ;

            const wxChar current = CurrentChar();
            const wxChar next = NextChar();

            // #if
            if (current == _T('i') && next == _T('f'))
                SkipToEndConditionPreprocessor();

            // #endif
            else if (current == _T('e') && next == _T('n'))
            {
                SkipToEOL(false);
                break;
            }
        }
    }
    while (MoveToNextChar());
}
示例#4
0
bool Tokenizer::SkipToOneOfChars(const wxChar* chars, bool supportNesting)
{
    // skip everything until we find any one of chars
    while (1)
    {
        while (NotEOF() && !CharInString(CurrentChar(), chars))
        {
            if (CurrentChar() == '"' || CurrentChar() == '\'')
            {
                // this is the case that match is inside a string!
                wxChar ch = CurrentChar();
                MoveToNextChar();
                SkipToChar(ch);
            }
            MoveToNextChar();

            // make sure we skip comments
            if (CurrentChar() == '/')
                SkipComment(); // this will decide if it is a comment

            // use 'while' here to cater for consecutive blocks to skip (e.g. sometemplate<foo>(bar)
            // must skip <foo> and immediately after (bar))
            // because if we don't, the next block won't be skipped ((bar) in the example) leading to weird
            // parsing results
            bool done = false;
            while (supportNesting && !done)
            {
                switch (CurrentChar())
                {
                    case '{': SkipBlock('{'); break;
                    case '(': SkipBlock('('); break;
                    case '[': SkipBlock('['); break;
                    case '<': // don't skip if << operator
                        if (NextChar() == '<')
                            MoveToNextChar(2); // skip it and also the next '<' or the next '<' leads to a SkipBlock('<');
                        else
                            SkipBlock('<');
                        break;
                    default: done = true; break;
                }
            }
        }
        if (PreviousChar() != '\\')
            break;
        else
        {
            // check for "\\"
            if (m_TokenIndex - 2 >= 0 && m_Buffer.GetChar(m_TokenIndex - 2) == '\\')
                break;
        }
        MoveToNextChar();
    }
    if (IsEOF())
        return false;
    return true;
}
示例#5
0
void TextCutter::GetChunk(wxString& chunk, bool& isWord)
{
    isWord = false;
    chunk = wxEmptyString;
    if (IsEOF())
        return;

    unsigned int start = m_CurIdx;
    if (isalpha(CurrentChar()) || CurrentChar() == '_')
    {
        while (!IsEOF() &&
               (isalnum(CurrentChar()) || CurrentChar() == '_'))
            MoveToNextChar();
        chunk = m_Text.Mid(start, m_CurIdx - start);
        isWord = true;
    }
    else
    {
        SkipWhiteSpace();
        SkipUnwanted();
        if (start != m_CurIdx)
        {
            chunk = m_Text.Mid(start, m_CurIdx - start);
            return;
        }

        if (isdigit(CurrentChar()))
        {
            // numbers
            while (!IsEOF() && CharInString(CurrentChar(), "0123456789.abcdefABCDEFXxLl"))
                MoveToNextChar();

        }
        else if (CurrentChar() == '"' ||
                CurrentChar() == '\'')
        {
            // string, char, etc.
            wxChar match = CurrentChar();
            MoveToNextChar();  // skip starting ' or "
            SkipToChar(match);
            MoveToNextChar(); // skip ending ' or "
        }
        else
        {
            MoveToNextChar();
        }
        chunk = m_Text.Mid(start, m_CurIdx - start);
    }
    return;
}
示例#6
0
//  For example: X"ABCDEFG\"HIJKLMN"Y
//  We are now at A, and would skip to Y
//  The double quote before H is a "C-escaped-character", We shouldn't quite from that
bool Tokenizer::SkipToStringEnd(const wxChar& ch)
{
    while (true)
    {
        while (CurrentChar() != ch && MoveToNextChar()) // don't check EOF when MoveToNextChar already does
            ;

        if (IsEOF())
            return false;

        if (IsEscapedChar()) break;

        MoveToNextChar();
    }
    return true;
}
示例#7
0
// if we really move forward, return true, which means we have the new m_TokenIndex
// if we stay here, return false
bool Tokenizer::SkipComment()
{
    if (IsEOF())
        return false;

    bool cstyle;            // C or C++ style comments

    //check the comment prompt
    if (CurrentChar() == '/')
    {
        if      (NextChar() == '*')
            cstyle = true;
        else if (NextChar() == '/')
            cstyle = false;
        else
            return false; // Not a comment, return false;
    }
    else
        return false;     // Not a comment, return false;

    TRACE(_T("SkipComment() : Start from line = %d"), m_LineNumber);
    MoveToNextChar(2);    // Skip the comment prompt

    // Here, we are in the comment body
    while (true)
    {
        if (cstyle)      // C style comment
        {
            SkipToChar('/');
            if (PreviousChar() == '*') // end of a C style comment
            {
                MoveToNextChar();
                break;
            }
            if (!MoveToNextChar())
                break;
        }
        else             // C++ style comment
        {
            TRACE(_T("SkipComment() : Need to call SkipToEOL() here at line = %d"), m_LineNumber);
            SkipToInlineCommentEnd();
            break;
        }
    }

    return true;
}
示例#8
0
// expect we are not in a C-string
bool Tokenizer::SkipToChar(const wxChar& ch)
{
    // skip everything until we find ch
    while (CurrentChar() != ch && MoveToNextChar())  // don't check EOF when MoveToNextChar already does
        ;

    return NotEOF();
}
示例#9
0
// return true if we really skip a string, that means m_TokenIndex has changed.
bool Tokenizer::SkipString()
{
    if (IsEOF())
        return false;

    const wxChar ch = CurrentChar();
    if (ch == _T('"') || ch == _T('\''))
    {
        // this is the case that match is inside a string!
        MoveToNextChar();
        SkipToStringEnd(ch);
        MoveToNextChar();
        return true;
    }

    return false;
}
示例#10
0
bool TextCutter::SkipWhiteSpace()
{
    if (IsEOF())
        return false;
    while (!IsEOF() && isspace(CurrentChar()))
        MoveToNextChar();
    return true;
}
示例#11
0
// expect we are not in a C-string.
bool Tokenizer::SkipToOneOfChars(const wxChar* chars, bool supportNesting, bool skipPreprocessor, bool skipAngleBrace)
{
    while (NotEOF() && !CharInString(CurrentChar(), chars))
    {
        MoveToNextChar();

        while (SkipString() || SkipComment())
            ;

        // use 'while' here to cater for consecutive blocks to skip (e.g. sometemplate<foo>(bar)
        // must skip <foo> and immediately after (bar))
        // because if we don't, the next block won't be skipped ((bar) in the example) leading to weird
        // parsing results
        bool done = false;
        while (supportNesting && !done)
        {
            switch (CurrentChar())
            {
                case '#':
                    if (skipPreprocessor)
                        SkipToEOL(true);
                    else
                        done = true;
                    break;
                case '{': SkipBlock('{'); break;
                case '(': SkipBlock('('); break;
                case '[': SkipBlock('['); break;
                case '<': // don't skip if << operator
                    if (skipAngleBrace)
                    {
                        if (NextChar() == '<')
                            MoveToNextChar(2); // skip it and also the next '<' or the next '<' leads to a SkipBlock('<');
                        else
                            SkipBlock('<');
                        break;
                    }

                default: done = true; break;
            }
        }

    }

    return NotEOF();
}
示例#12
0
bool Tokenizer::SkipBlock(const wxChar& ch)
{
    // skip blocks () [] {} <>
    wxChar match;
    switch (ch)
    {
        case '(': match = ')'; break;
        case '[': match = ']'; break;
        case '{': match = '}'; break;
        case '<': match = '>'; break;
        default : return false;
    }

    MoveToNextChar();
    int count = 1; // counter for nested blocks (xxx())
    while (NotEOF())
    {
        bool noMove = false;
        if (CurrentChar() == '/')
            SkipComment(); // this will decide if it is a comment

        if (CurrentChar() == '"' || CurrentChar() == '\'')
        {
            // this is the case that match is inside a string!
            wxChar ch = CurrentChar();
            MoveToNextChar();
            SkipToChar(ch);
            MoveToNextChar();
            // don't move to next char below if concatenating strings (e.g. printf("" ""))
            if (CurrentChar() == '"' || CurrentChar() == '\'')
                noMove = true;
        }
        if (CurrentChar() == ch)
            ++count;
        else if (CurrentChar() == match)
            --count;
        if (!noMove)
            MoveToNextChar();
        if (count == 0)
            break;
    }
    if (IsEOF())
        return false;
    return true;
}
示例#13
0
bool Tokenizer::SkipWhiteSpace()
{
    // skip spaces, tabs, etc.
    while (CurrentChar() <= _T(' ') && MoveToNextChar()) // don't check EOF when MoveToNextChar already does, also replace isspace() which calls msvcrt.dll
        ;                                                // with a dirty hack:  CurrentChar() <= ' ' is "good enough" here
    if (IsEOF())
        return false;
    return true;
}
示例#14
0
void TextCutter::SkipToChar(const wxChar& ch)
{
	// skip everything until we find ch
	while (1)
	{
		while (!IsEOF() && CurrentChar() != ch && CurrentChar() != '\n')
			MoveToNextChar();
        break;
	}
}
示例#15
0
PreprocessorType Tokenizer::GetPreprocessorType()
{
    const unsigned int undoIndex = m_TokenIndex;
    const unsigned int undoLine = m_LineNumber;

    MoveToNextChar();
    while (SkipWhiteSpace() || SkipComment())
        ;

    const wxString token = DoGetToken();

    switch (token.Len())
    {
    case 2:
        if (token == TokenizerConsts::kw_if)
            return ptIf;
        break;

    case 4:
        if (token == TokenizerConsts::kw_else)
            return ptElse;
        else if (token == TokenizerConsts::kw_elif)
            return ptElif;
        break;

    case 5:
        if (token == TokenizerConsts::kw_ifdef)
            return ptIfdef;
        else if (token == TokenizerConsts::kw_endif)
            return ptEndif;
        break;

    case 6:
        if (token == TokenizerConsts::kw_ifndef)
            return ptIfndef;
        break;

    case 7:
        if (token == TokenizerConsts::kw_elifdef)
            return ptElifdef;
        break;

    case 8:
        if (token == TokenizerConsts::kw_elifndef)
            return ptElifndef;
        break;
    }

    m_TokenIndex = undoIndex;
    m_LineNumber = undoLine;
    return ptOthers;
}
示例#16
0
bool Tokenizer::SkipComment(bool skipWhiteAtEnd) // = true
{
    // C/C++ style comments
    bool is_comment = CurrentChar() == '/' && (NextChar() == '/' || NextChar() == '*');
    if (!is_comment)
        return true;

    bool cstyle = NextChar() == '*';
    MoveToNextChar(2);
    while (1)
    {
        if (!cstyle)
        {
            if (!SkipToEOL(false, true))
                return false;
            MoveToNextChar();
            break;
        }
        else
        {
            if (SkipToChar('/'))
            {
                if (PreviousChar() == '*')
                {
                    MoveToNextChar();
                    break;
                }
                MoveToNextChar();
            }
            else
                return false;
        }
    }
    if (IsEOF())
        return false;
    if (skipWhiteAtEnd && !SkipWhiteSpace())
        return false;
    return CurrentChar() == '/' ? SkipComment() : true; // handle chained comments
}
示例#17
0
uLONG VLexerFileInput::FindCommandStart()
{
	uLONG nSkipped = 0;
	UniChar nChar;
	while (HasMoreChars()) {
		nSkipped++;
		nChar = MoveToNextChar();
		if (nChar == CHAR_SEMICOLON)	break;
	}
	fCommandStart = fCurrentChar;

	return nSkipped;
}
示例#18
0
void Tokenizer::SkipToNextConditionPreprocessor()
{
    do
    {
        wxChar ch = CurrentChar();
        if (ch == _T('\'') || ch == _T('"') || ch == _T('/') || ch <= _T(' '))
        {
            while (SkipWhiteSpace() || SkipString() || SkipComment())
                ;
            ch = CurrentChar();
        }

        if (ch == _T('#'))
        {
            const unsigned int undoIndex = m_TokenIndex;
            const unsigned int undoLine = m_LineNumber;

            MoveToNextChar();
            while (SkipWhiteSpace() || SkipComment())
                ;

            const wxChar current = CurrentChar();
            const wxChar next = NextChar();

            // #if
            if (current == _T('i') && next == _T('f'))
                SkipToEndConditionPreprocessor();

            // #else #elif #elifdef #elifndef #endif
            else if (current == _T('e') && (next == _T('l') || next == _T('n')))
            {
                m_TokenIndex = undoIndex;
                m_LineNumber = undoLine;
                break;
            }
        }
    }
    while (MoveToNextChar());
}
示例#19
0
bool Tokenizer::SkipToChar(const wxChar& ch)
{
    // skip everything until we find ch
    while(true)
    {
        while (CurrentChar() != ch && MoveToNextChar())  // don't check EOF when MoveToNextChar already does
            ;

        if (IsEOF())
            return false;

        if (PreviousChar() != '\\')
            break;
        else
        {
            // check for "\\"
            if (m_TokenIndex - 2 >= 0 && m_Buffer.GetChar(m_TokenIndex - 2) == '\\')
                break;
        }
        MoveToNextChar();
    }
    return true;
}
示例#20
0
bool Tokenizer::SkipToEOL(bool nestBraces)
{
    TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c', nestBrace(%d)"),
          wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(),
          PreviousChar(), NextChar(), nestBraces ? 1 : 0);

    // skip everything until we find EOL
    for (;;)
    {
        while (NotEOF() && CurrentChar() != '\n')
        {
            if (CurrentChar() == '/' && NextChar() == '*')
            {
                SkipComment();
                if (CurrentChar() == _T('\n'))
                    break;
            }

            if (nestBraces && CurrentChar() == _T('{'))
                ++m_NestLevel;
            else if (nestBraces && CurrentChar() == _T('}'))
                --m_NestLevel;

            MoveToNextChar();
        }

        if (!IsBackslashBeforeEOL() || IsEOF())
            break;
        else
            MoveToNextChar();
    }

    TRACE(_T("SkipToEOL(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
          m_LineNumber, CurrentChar(), PreviousChar(), NextChar());

    return NotEOF();
}
示例#21
0
void Tokenizer::SpliteArguments(wxArrayString& results)
{
    while (SkipWhiteSpace() || SkipComment())
        ;
    if (CurrentChar() != _T('('))
        return;

    MoveToNextChar(); // Skip the '('
    int level = 1; // include '('

    wxString piece;
    while (NotEOF())
    {
        wxString token = DoGetToken();
        if (token.IsEmpty())
            break;

        if (token == _T("("))
            ++level;
        else if (token == _T(")"))
            --level;

        if (token == _T(","))
        {
            results.Add(piece);
            piece.Clear();
        }
        else if (level != 0)
        {
            if (!piece.IsEmpty() && piece.Last() > _T(' '))
                piece << _T(" ");
            piece << token;
        }

        if (level == 0)
        {
            if (!piece.IsEmpty())
                results.Add(piece);
            break;
        }

        while (SkipWhiteSpace() || SkipComment())
            ;
    }
}
示例#22
0
bool Tokenizer::SkipToInlineCommentEnd()
{
    TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
          wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(),
          PreviousChar(), NextChar());

    // skip everything until we find EOL
    while (true)
    {
        SkipToChar(_T('\n'));
        if (!IsBackslashBeforeEOL() || IsEOF())
            break;
        else
            MoveToNextChar();
    }

    TRACE(_T("SkipToInlineCommentEnd(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c',")
          _T(" NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar());

    return NotEOF();
}
示例#23
0
void TextCutter::SkipToEOL()
{
    while (!IsEOF() && CurrentChar() != '\n')
        MoveToNextChar();
}
示例#24
0
//vfc add bGetValue
bool Tokenizer::SkipUnwanted(bool bGetValue) 
{
    while (CurrentChar() == '#' ||
            (!m_IsOperator && CurrentChar() == '=') ||
            (!m_IsOperator && CurrentChar() == '[') ||
            CurrentChar() == '?' ||
            (CurrentChar() == '/' && (NextChar() == '/' || NextChar() == '*') ))
    {
        bool skipPreprocessor = false; // used for #include
        while (m_Buffer.Mid(m_TokenIndex, 2) == _T("//") ||
                m_Buffer.Mid(m_TokenIndex, 2) == _T("/*"))
        {
            // C/C++ style comments
            SkipComment();
            if (IsEOF())
                return false;
            if (!SkipWhiteSpace())
                return false;
        }

        while (CurrentChar() == '#')
        {
            // preprocessor directives
            // we only care for #include and #define, for now
            unsigned int backupIdx = m_TokenIndex;
            MoveToNextChar();
            SkipWhiteSpace();
            if ((CurrentChar() == 'i' && NextChar() == 'n') || // in(clude)
                (CurrentChar() == 'i' && NextChar() == 'f') || // if(|def|ndef)
                (CurrentChar() == 'e' && NextChar() == 'l') || // el(se|if)
                (CurrentChar() == 'e' && NextChar() == 'n') || // en(dif)
                (m_Options.wantPreprocessor && CurrentChar() == 'd' && NextChar() == 'e')) // de(fine)
            {
                // ok, we have something like #in(clude)
                m_LastWasPreprocessor = true;
                m_LastPreprocessor.Clear();
                m_TokenIndex = backupIdx; // keep #
                skipPreprocessor = true;
                break;
            }
            else
            {
                // skip the rest for now...
                SkipToEOL(false);
                if (!SkipWhiteSpace())
                    return false;
            }
            if (skipPreprocessor)
                break;
        }

        while (CurrentChar() == '[')
        {
            // array subscripts
            // skip them for now...
            SkipBlock('[');
            if (!SkipWhiteSpace())
                return false;
        }

        while (CurrentChar() == '=')
        {
            // skip assignments
            // TODO: what happens with operators?
			if (bGetValue == true)
			{
				MoveToNextChar();
				SkipWhiteSpace();
				return true;
			}
	        else if (!SkipToOneOfChars(_T(",;}"), true))
                return false;
        }

        while (CurrentChar() == '?')
        {
            // skip "condition ? true : false"
            // TODO: what happens with operators?
            if (!SkipToOneOfChars(_T(";}")))
                return false;
        }
        if (skipPreprocessor)
            break;
    }
    return true;
}
示例#25
0
//vfc add bGetValue
wxString Tokenizer::DoGetToken(bool bGetValue, bool bTemplate)
{
    if (IsEOF())
        return wxEmptyString;

    if (!SkipWhiteSpace())
        return wxEmptyString;

    if (m_SkipUnwantedTokens && !SkipUnwanted(bGetValue))
        return wxEmptyString;

    // if m_SkipUnwantedTokens is false, we need to handle comments here too
    if (!m_SkipUnwantedTokens)
        SkipComment();

    int start = m_TokenIndex;
    wxString m_Str;
    wxChar c = CurrentChar();

    if (c == '_' || wxIsalpha(c))
    {
        // keywords, identifiers, etc.

        // operator== is cheaper than wxIsalnum, also MoveToNextChar already includes IsEOF
        while (  ( CurrentChar() == '_' ||
                   wxIsalnum(CurrentChar()) ) && MoveToNextChar()  )
        ;

        if (IsEOF())
            return wxEmptyString;
        m_Str = m_Buffer.Mid(start, m_TokenIndex - start);
        m_IsOperator = m_Str.IsSameAs(TokenizerConsts::operator_str);
    }
#ifdef __WXMSW__ // This is a Windows only bug!
    else if (c == 178  || c == 179 || c == 185) // fetch ?and ?
    {
        m_Str = c;
        MoveToNextChar();
    }
#endif
    else if (wxIsdigit(CurrentChar()))
    {
        // numbers
        while (NotEOF() && CharInString(CurrentChar(), _T("0123456789.abcdefABCDEFXxLl")))
            MoveToNextChar();
        if (IsEOF())
            return wxEmptyString;
        m_Str = m_Buffer.Mid(start, m_TokenIndex - start);
        m_IsOperator = false;
    }
    else if (CurrentChar() == '"' ||
            CurrentChar() == '\'')
    {
        // string, char, etc.
        wxChar match = CurrentChar();
        MoveToNextChar();  // skip starting ' or "
        if (!SkipToChar(match))
            return wxEmptyString;
        MoveToNextChar(); // skip ending ' or "
        m_Str = m_Buffer.Mid(start, m_TokenIndex - start);
    }
    else if (CurrentChar() == ':')
    {
        if (NextChar() == ':')
        {
            MoveToNextChar();
            MoveToNextChar();
            m_Str.assign(TokenizerConsts::colon_colon); // this only copies a pointer, but operator= allocates memory and does a memcpy!
        }
        else
        {
            MoveToNextChar();
            m_Str.assign(TokenizerConsts::colon);
        }
    }
	else if (CurrentChar() == '<' && bTemplate)
	{
		wxChar match = _T('>');
		MoveToNextChar();
		if (!SkipToOneOfChars(_T(">\r\n")),false)
			return wxEmptyString;
		MoveToNextChar();
		wxString tmp = m_Buffer.Mid(start+1,m_TokenIndex-start-2);
		tmp.Trim();
        m_Str = _T("<");
		m_Str += tmp;
		m_Str += _T(">");//m_Buffer.Mid(start, m_TokenIndex - start);
	}
    else if (CurrentChar() == '(')
    {
        m_IsOperator = false;
        // skip blocks () []
        if (!SkipBlock(CurrentChar()))
            return wxEmptyString;
        wxString tmp = m_Buffer.Mid(start, m_TokenIndex - start);
//        tmp.Replace(_T("\t"), _T(" ")); // replace tabs with spaces
//        tmp.Replace(_T("\n"), _T(" ")); // replace LF with spaces
//        tmp.Replace(_T("\r"), _T(" ")); // replace CR with spaces
        { // this is much faster:
            size_t i;
            while((i = tmp.find_first_of(TokenizerConsts::tabcrlf)) != wxString::npos)
                //tmp[i] = _T(' ');
				tmp.SetAt(i,_T(' '));
        }
        // fix-up arguments (remove excessive spaces/tabs/newlines)
        for (unsigned int i = 0; i < tmp.Length() - 1; ++i)
        {
            //skip spaces before '=' and ','
            if (tmp.GetChar(i) == ' ' && (tmp.GetChar(i + 1) == ',' || tmp.GetChar(i + 1) == '='))
				continue;

            if (tmp.GetChar(i) == '/' && tmp.GetChar(i + 1) == '*')
            {
                // skip C comments
                i += 2;
                while (i < tmp.Length() - 1)
                {
                    if (tmp.GetChar(i) == '*' && tmp.GetChar(i + 1) == '/')
                        break;
                    ++i;
                }
                if (i >= tmp.Length() - 1 || tmp.GetChar(i + 1) != '/')
                    continue; // we failed...
                i += 2;
            }
            else if (tmp.GetChar(i) == '=')
            {
                // skip default assignments
                ++i;
                int level = 0; // nesting parenthesis
                while (i < tmp.Length())
                {
                    if (tmp.GetChar(i) == '(')
                        ++level;
                    else if (tmp.GetChar(i) == ')')
                        --level;
                    if ((tmp.GetChar(i) == ',' && level == 0) ||
                        (tmp.GetChar(i) == ')' && level < 0))
                        break;
                    ++i;
                }
                if (i < tmp.Length() && tmp.GetChar(i) == ',')
                    --i;
                continue; // we are done here
            }

            if (i < tmp.Length() - 1)
            {
                if ((tmp.GetChar(i)     == ' ') && (tmp.GetChar(i + 1) == ' '))
                    continue; // skip excessive spaces

                // in case of c-style comments "i" might already be tmp.Length()
                // thus do only add the current char otherwise.
                // otherwise the following statement:
                // m_Str << _T(')');
                // below would add another closing bracket.
                m_Str << tmp.GetChar(i);
            }
        }
        m_Str << _T(')'); // add closing parenthesis (see "i < tmp.Length() - 1" in previous "for")
//        m_Str.Replace(_T("  "), _T(" ")); // replace two-spaces with single-space (introduced if it skipped comments or assignments)
//        m_Str.Replace(_T("( "), _T("("));
//        m_Str.Replace(_T(" )"), _T(")"));
        //Str.Replace is massive overkill here since it has to allocate one new block per replacement
        CompactSpaces(m_Str);
    }
    else
    {
        if (CurrentChar() == '{')
            ++m_NestLevel;
        else if (CurrentChar() == '}')
            --m_NestLevel;
        m_Str = CurrentChar();
        MoveToNextChar();
    }

    if (m_LastWasPreprocessor && !m_Str.IsSameAs(_T("#")) && !m_LastPreprocessor.IsSameAs(_T("#")))
    {
        if (!m_LastPreprocessor.IsSameAs(TokenizerConsts::include_str))
        {
            // except for #include and #if[[n]def], all other preprocessor directives need only
            // one word exactly after the directive, e.g. #define THIS_WORD
            SkipToEOL();
        }
        m_LastPreprocessor.Clear();
    }

    if (m_LastWasPreprocessor)
        m_LastPreprocessor << m_Str;
    m_LastWasPreprocessor = false;

    return m_Str;
}
示例#26
0
void Tokenizer::ReadParentheses(wxString& str)
{
    static const size_t maxBufferLen = 4093;
    wxChar buffer[maxBufferLen + 3];
    buffer[0] = _T('$'); // avoid segfault error
    wxChar* realBuffer = buffer + 1;
    wxChar* p = realBuffer;

    int level = 0;

    while (NotEOF())
    {
        while (SkipComment())
            ;
        wxChar ch = CurrentChar();

        while (ch == _T('#')) // do not use if
        {
            const PreprocessorType type = GetPreprocessorType();
            if (type == ptOthers)
                break;
            HandleConditionPreprocessor(type);
            ch = CurrentChar();
        }

        const unsigned int startIndex = m_TokenIndex;

        switch(ch)
        {
        case _T('('):
            {
                ++level;
                *p = ch;
                ++p;
            }
            break;

        case _T(')'):
            {
                if (*(p - 1) <= _T(' '))
                    --p;
                --level;
                *p = ch;
                ++p;
            }
            break;

        case _T('\''):
        case _T('"'):
            {
                MoveToNextChar();
                SkipToStringEnd(ch);
                MoveToNextChar();
                const size_t writeLen = m_TokenIndex - startIndex;
                const size_t usedLen = p - realBuffer;
                if (usedLen + writeLen > maxBufferLen)
                {
                    if (writeLen > maxBufferLen)
                    {
                        TRACE(_T("ReadParentheses, Catch Exception 1: %d"), writeLen);
                        return;
                    }

                    if (p != realBuffer)
                    {
                        str.Append(realBuffer, usedLen);
                        p = realBuffer;
                    }

                    str.Append((const wxChar*)m_Buffer + startIndex, writeLen);
                }
                else
                {
                    memcpy(p, (const wxChar*)m_Buffer + startIndex, writeLen * sizeof(wxChar));
                    p += writeLen;
                }

                continue;
            }
            break;

        case _T(','):
            {
                if (*(p - 1) <= _T(' '))
                    --p;

                *p = _T(',');
                *++p = _T(' ');
                ++p;
            }
            break;

        case _T('*'):
            {
                if (*(p - 1) <= _T(' '))
                    --p;

                *p = _T('*');
                *++p = _T(' ');
                ++p;
            }
            break;

        case _T('&'):
            {
                if (*(p - 1) <= _T(' '))
                    --p;

                *p = _T('&');
                *++p = _T(' ');
                ++p;
            }
            break;

        case _T('='):
            {
                if (*(p - 1) <= _T(' '))
                {
                    *p = _T('=');
                    *++p = _T(' ');
                    ++p;
                }
                else
                {
                    switch (*(p - 1))
                    {
                    case _T('='):
                    case _T('!'):
                    case _T('>'):
                    case _T('<'):
                        {
                            *p = _T('=');
                            *++p = _T(' ');
                            ++p;
                        }

                    default:
                        {
                            *p = _T(' ');
                            *++p = _T('=');
                            *++p = _T(' ');
                            ++p;
                        }
                    }
                }
            }
            break;

        case _T(' '):
            {
                if (*(p - 1) != _T(' ') && *(p - 1) != _T('('))
                {
                    *p = _T(' ');
                    ++p;
                }
            }
            break;

        case _T('\r'):
        case _T('\t'):
            break;

        case _T('\n'): // we need keep the \n for records paras correct position
            if (*(p - 1) == _T(' '))
                --p;
            if (*(p - 1) != _T('('))
            {
                *p = ch;
                ++p;
            }
            break;

        default:
            {
                *p = ch;
                ++p;
            }
            break;
        }

        if (p >= realBuffer + maxBufferLen)
        {
            str.Append(realBuffer, p - realBuffer);
            p = realBuffer;
        }

        MoveToNextChar();

        if (level == 0)
            break;
    }

    if (p > realBuffer)
        str.Append(realBuffer, p - realBuffer);
    TRACE(_T("ReadParentheses(): %s, line=%d"), str.wx_str(), m_LineNumber);
    if (str.Len() > 512)
        TRACE(_T("ReadParentheses: Catch Exception 2?: %d"), str.Len());
}
示例#27
0
wxString Tokenizer::ReadToEOL(bool nestBraces, bool stripUnneeded)
{
    if (stripUnneeded)
    {
        TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c', nestBrace(%d)"),
              wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(),
              PreviousChar(), NextChar(), nestBraces ? 1 : 0);

        static const size_t maxBufferLen = 4094;
        wxChar buffer[maxBufferLen + 2];
        wxChar* p = buffer;
        wxString str;

        for (;;)
        {
            while (NotEOF() && CurrentChar() != _T('\n'))
            {
                while (SkipComment())
                    ;

                const wxChar ch = CurrentChar();
                if (ch == _T('\n'))
                    break;

                if (ch <= _T(' ') && (p == buffer || *(p - 1) == ch))
                {
                    MoveToNextChar();
                    continue;
                }

                *p = ch;
                ++p;

                if (p >= buffer + maxBufferLen)
                {
                    str.Append(buffer, p - buffer);
                    p = buffer;
                }

                if (nestBraces)
                {
                    if (ch == _T('{'))
                        ++m_NestLevel;
                    else if (ch == _T('}'))
                        --m_NestLevel;
                }

                MoveToNextChar();
            }

            if (!IsBackslashBeforeEOL() || IsEOF())
                break;
            else
            {
                while (p > buffer && *(--p) <= _T(' '))
                    ;
                MoveToNextChar();
            }
        }

        while (p > buffer && *(p - 1) <= _T(' '))
            --p;

        if (p > buffer)
            str.Append(buffer, p - buffer);

        TRACE(_T("ReadToEOL(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
              m_LineNumber, CurrentChar(), PreviousChar(), NextChar());
        TRACE(_T("ReadToEOL(): %s"), str.wx_str());

        return str;
    }
    else
    {
        const unsigned int idx = m_TokenIndex;
        SkipToEOL(nestBraces);
        return m_Buffer.Mid(idx, m_TokenIndex - idx);
    }
}
示例#28
0
wxString Tokenizer::DoGetToken()
{
    int start = m_TokenIndex;
    bool needReplace = false;

    wxString str;
    wxChar   c = CurrentChar();

    if (c == '_' || wxIsalpha(c))
    {
        // keywords, identifiers, etc.

        // operator== is cheaper than wxIsalnum, also MoveToNextChar already includes IsEOF
        while (    ( (c == '_') || (wxIsalnum(c)) )
               &&  MoveToNextChar() )
            c = CurrentChar(); // repeat

        if (IsEOF())
            return wxEmptyString;

        needReplace = true;
        str = m_Buffer.Mid(start, m_TokenIndex - start);
    }
#ifdef __WXMSW__ // This is a Windows only bug!
    // fetch non-English characters, see more details in: http://forums.codeblocks.org/index.php/topic,11387.0.html
    else if (c == 178 || c == 179 || c == 185)
    {
        str = c;
        MoveToNextChar();
    }
#endif
    else if (wxIsdigit(c))
    {
        // numbers
        while (NotEOF() && CharInString(CurrentChar(), _T("0123456789.abcdefABCDEFXxLl")))
            MoveToNextChar();

        if (IsEOF())
            return wxEmptyString;

        str = m_Buffer.Mid(start, m_TokenIndex - start);
    }
    else if ( (c == '"') || (c == '\'') )
    {
        SkipString();
        //Now, we are after the end of the C-string, so return the whole string as a token.
        str = m_Buffer.Mid(start, m_TokenIndex - start);
    }
    else if (c == ':')
    {
        if (NextChar() == ':')
        {
            MoveToNextChar();
            MoveToNextChar();
            // this only copies a pointer, but operator= allocates memory and does a memcpy!
            str.assign(TokenizerConsts::colon_colon);
        }
        else
        {
            MoveToNextChar();
            str.assign(TokenizerConsts::colon);
        }
    }
    else if (c == '<')
    {
        if (m_State&tsSingleAngleBrace)
        {
            if ( !SkipToOneOfChars(  _T(">"), true, true)   )
                return wxEmptyString;
            MoveToNextChar();
            str= m_Buffer.Mid(start, m_TokenIndex - start);
        }
        else
        {
            str = c;
            MoveToNextChar();
        }
    }
    else if (c == '(')
    {
        if (m_State & tsReadRawExpression)
        {
            str = c;
            MoveToNextChar();
        }
        else
        {
            ReadParentheses(str);
        }
    }
    else
    {
        if      (c == '{')
            ++m_NestLevel;
        else if (c == '}')
            --m_NestLevel;

        str = c;
        MoveToNextChar();
    }

    if (m_FirstRemainingLength != 0 && m_BufferLen - m_FirstRemainingLength < m_TokenIndex)
    {
        m_FirstRemainingLength = 0;
        m_IsReplaceParsing = false;
        m_RepeatReplaceCount = 0;
    }

    if (needReplace && m_State ^ tsReadRawExpression)
        MacroReplace(str);

    return str;
}
示例#29
0
void Tokenizer::MacroReplace(wxString& str)
{
    if (m_IsReplaceParsing)
    {
        const int id = m_TokensTree->TokenExists(str, -1, tkPreprocessor);
        if (id != -1)
        {
            Token* tk = m_TokensTree->at(id);
            if (tk)
            {
                bool replaced = false;
                if (!tk->m_Args.IsEmpty())
                    replaced = ReplaceMacroActualContext(tk, false);
                else if (tk->m_Type != tk->m_Name)
                    replaced = ReplaceBufferForReparse(tk->m_Type, false);
                if (replaced || tk->m_Type.IsEmpty())
                {
                    SkipUnwanted();
                    str = DoGetToken();
                }
            }
        }
    }

    wxStringHashMap::const_iterator it = s_Replacements.find(str);
    if (it == s_Replacements.end())
        return;

    TRACE(_T("MacroReplace() : Replacing '%s' with '%s' (file='%s', line='%d')."), it->first.wx_str(),
          it->second.wx_str(), m_Filename.wx_str(), m_LineNumber);

    if (it->second.IsEmpty())
    {
        SkipUnwanted();
        str = DoGetToken();
    }
    else if (it->second[0] == _T('+'))
    {
        while (SkipWhiteSpace() || SkipComment())
            ;
        DoGetToken(); // eat (...)
        wxString target = (const wxChar*)it->second + 1;
        if (target.IsEmpty())
        {
            while (SkipWhiteSpace() || SkipComment())
                ;
            str = DoGetToken();
        }
        else if (target != str && ReplaceBufferForReparse(target, false))
            str = DoGetToken();
    }
    else if (it->second[0] == _T('-'))
    {
        wxString end((const wxChar*)it->second + 1);
        if (end.IsEmpty())
            return;

        while (NotEOF())
        {
            while (SkipComment() && SkipWhiteSpace())
                ;
            if (CurrentChar() == end[0])
            {
                if (DoGetToken() == end)
                    break;
            }
            else
                MoveToNextChar();
        }

        // eat ()
        SkipUnwanted();
        str = DoGetToken();
        if (str[0] == _T('('))
        {
            SkipUnwanted();
            str = DoGetToken();
        }
    }
    else
    {
        if (it->second != str && ReplaceBufferForReparse(it->second, false))
            str = DoGetToken();
    }
}