Esempio n. 1
0
bool Tokenizer::SkipToOneOfChars(const wxChar* chars, bool supportNesting)
{
    // skip everything until we find any one of chars
    while (1)
    {
        while (NotEOF() && !CharInString(CurrentChar(), chars))
        {
            if (CurrentChar() == '"' || CurrentChar() == '\'')
            {
                // this is the case that match is inside a string!
                wxChar ch = CurrentChar();
                MoveToNextChar();
                SkipToChar(ch);
            }
            MoveToNextChar();

            // make sure we skip comments
            if (CurrentChar() == '/')
                SkipComment(); // this will decide if it is a comment

            // use 'while' here to cater for consecutive blocks to skip (e.g. sometemplate<foo>(bar)
            // must skip <foo> and immediately after (bar))
            // because if we don't, the next block won't be skipped ((bar) in the example) leading to weird
            // parsing results
            bool done = false;
            while (supportNesting && !done)
            {
                switch (CurrentChar())
                {
                    case '{': SkipBlock('{'); break;
                    case '(': SkipBlock('('); break;
                    case '[': SkipBlock('['); break;
                    case '<': // don't skip if << operator
                        if (NextChar() == '<')
                            MoveToNextChar(2); // skip it and also the next '<' or the next '<' leads to a SkipBlock('<');
                        else
                            SkipBlock('<');
                        break;
                    default: done = true; break;
                }
            }
        }
        if (PreviousChar() != '\\')
            break;
        else
        {
            // check for "\\"
            if (m_TokenIndex - 2 >= 0 && m_Buffer.GetChar(m_TokenIndex - 2) == '\\')
                break;
        }
        MoveToNextChar();
    }
    if (IsEOF())
        return false;
    return true;
}
Esempio n. 2
0
void TextCutter::GetChunk(wxString& chunk, bool& isWord)
{
    isWord = false;
    chunk = wxEmptyString;
    if (IsEOF())
        return;

    unsigned int start = m_CurIdx;
    if (isalpha(CurrentChar()) || CurrentChar() == '_')
    {
        while (!IsEOF() &&
               (isalnum(CurrentChar()) || CurrentChar() == '_'))
            MoveToNextChar();
        chunk = m_Text.Mid(start, m_CurIdx - start);
        isWord = true;
    }
    else
    {
        SkipWhiteSpace();
        SkipUnwanted();
        if (start != m_CurIdx)
        {
            chunk = m_Text.Mid(start, m_CurIdx - start);
            return;
        }

        if (isdigit(CurrentChar()))
        {
            // numbers
            while (!IsEOF() && CharInString(CurrentChar(), "0123456789.abcdefABCDEFXxLl"))
                MoveToNextChar();

        }
        else if (CurrentChar() == '"' ||
                CurrentChar() == '\'')
        {
            // string, char, etc.
            wxChar match = CurrentChar();
            MoveToNextChar();  // skip starting ' or "
            SkipToChar(match);
            MoveToNextChar(); // skip ending ' or "
        }
        else
        {
            MoveToNextChar();
        }
        chunk = m_Text.Mid(start, m_CurIdx - start);
    }
    return;
}
Esempio n. 3
0
// if we really move forward, return true, which means we have the new m_TokenIndex
// if we stay here, return false
bool Tokenizer::SkipComment()
{
    if (IsEOF())
        return false;

    bool cstyle;            // C or C++ style comments

    //check the comment prompt
    if (CurrentChar() == '/')
    {
        if      (NextChar() == '*')
            cstyle = true;
        else if (NextChar() == '/')
            cstyle = false;
        else
            return false; // Not a comment, return false;
    }
    else
        return false;     // Not a comment, return false;

    TRACE(_T("SkipComment() : Start from line = %d"), m_LineNumber);
    MoveToNextChar(2);    // Skip the comment prompt

    // Here, we are in the comment body
    while (true)
    {
        if (cstyle)      // C style comment
        {
            SkipToChar('/');
            if (PreviousChar() == '*') // end of a C style comment
            {
                MoveToNextChar();
                break;
            }
            if (!MoveToNextChar())
                break;
        }
        else             // C++ style comment
        {
            TRACE(_T("SkipComment() : Need to call SkipToEOL() here at line = %d"), m_LineNumber);
            SkipToInlineCommentEnd();
            break;
        }
    }

    return true;
}
Esempio n. 4
0
bool Tokenizer::SkipBlock(const wxChar& ch)
{
    // skip blocks () [] {} <>
    wxChar match;
    switch (ch)
    {
        case '(': match = ')'; break;
        case '[': match = ']'; break;
        case '{': match = '}'; break;
        case '<': match = '>'; break;
        default : return false;
    }

    MoveToNextChar();
    int count = 1; // counter for nested blocks (xxx())
    while (NotEOF())
    {
        bool noMove = false;
        if (CurrentChar() == '/')
            SkipComment(); // this will decide if it is a comment

        if (CurrentChar() == '"' || CurrentChar() == '\'')
        {
            // this is the case that match is inside a string!
            wxChar ch = CurrentChar();
            MoveToNextChar();
            SkipToChar(ch);
            MoveToNextChar();
            // don't move to next char below if concatenating strings (e.g. printf("" ""))
            if (CurrentChar() == '"' || CurrentChar() == '\'')
                noMove = true;
        }
        if (CurrentChar() == ch)
            ++count;
        else if (CurrentChar() == match)
            --count;
        if (!noMove)
            MoveToNextChar();
        if (count == 0)
            break;
    }
    if (IsEOF())
        return false;
    return true;
}
Esempio n. 5
0
bool Tokenizer::SkipToInlineCommentEnd()
{
    TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"),
          wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(),
          PreviousChar(), NextChar());

    // skip everything until we find EOL
    while (true)
    {
        SkipToChar(_T('\n'));
        if (!IsBackslashBeforeEOL() || IsEOF())
            break;
        else
            MoveToNextChar();
    }

    TRACE(_T("SkipToInlineCommentEnd(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c',")
          _T(" NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar());

    return NotEOF();
}
Esempio n. 6
0
bool Tokenizer::SkipComment(bool skipWhiteAtEnd) // = true
{
    // C/C++ style comments
    bool is_comment = CurrentChar() == '/' && (NextChar() == '/' || NextChar() == '*');
    if (!is_comment)
        return true;

    bool cstyle = NextChar() == '*';
    MoveToNextChar(2);
    while (1)
    {
        if (!cstyle)
        {
            if (!SkipToEOL(false, true))
                return false;
            MoveToNextChar();
            break;
        }
        else
        {
            if (SkipToChar('/'))
            {
                if (PreviousChar() == '*')
                {
                    MoveToNextChar();
                    break;
                }
                MoveToNextChar();
            }
            else
                return false;
        }
    }
    if (IsEOF())
        return false;
    if (skipWhiteAtEnd && !SkipWhiteSpace())
        return false;
    return CurrentChar() == '/' ? SkipComment() : true; // handle chained comments
}
Esempio n. 7
0
//vfc add bGetValue
wxString Tokenizer::DoGetToken(bool bGetValue, bool bTemplate)
{
    if (IsEOF())
        return wxEmptyString;

    if (!SkipWhiteSpace())
        return wxEmptyString;

    if (m_SkipUnwantedTokens && !SkipUnwanted(bGetValue))
        return wxEmptyString;

    // if m_SkipUnwantedTokens is false, we need to handle comments here too
    if (!m_SkipUnwantedTokens)
        SkipComment();

    int start = m_TokenIndex;
    wxString m_Str;
    wxChar c = CurrentChar();

    if (c == '_' || wxIsalpha(c))
    {
        // keywords, identifiers, etc.

        // operator== is cheaper than wxIsalnum, also MoveToNextChar already includes IsEOF
        while (  ( CurrentChar() == '_' ||
                   wxIsalnum(CurrentChar()) ) && MoveToNextChar()  )
        ;

        if (IsEOF())
            return wxEmptyString;
        m_Str = m_Buffer.Mid(start, m_TokenIndex - start);
        m_IsOperator = m_Str.IsSameAs(TokenizerConsts::operator_str);
    }
#ifdef __WXMSW__ // This is a Windows only bug!
    else if (c == 178  || c == 179 || c == 185) // fetch ?and ?
    {
        m_Str = c;
        MoveToNextChar();
    }
#endif
    else if (wxIsdigit(CurrentChar()))
    {
        // numbers
        while (NotEOF() && CharInString(CurrentChar(), _T("0123456789.abcdefABCDEFXxLl")))
            MoveToNextChar();
        if (IsEOF())
            return wxEmptyString;
        m_Str = m_Buffer.Mid(start, m_TokenIndex - start);
        m_IsOperator = false;
    }
    else if (CurrentChar() == '"' ||
            CurrentChar() == '\'')
    {
        // string, char, etc.
        wxChar match = CurrentChar();
        MoveToNextChar();  // skip starting ' or "
        if (!SkipToChar(match))
            return wxEmptyString;
        MoveToNextChar(); // skip ending ' or "
        m_Str = m_Buffer.Mid(start, m_TokenIndex - start);
    }
    else if (CurrentChar() == ':')
    {
        if (NextChar() == ':')
        {
            MoveToNextChar();
            MoveToNextChar();
            m_Str.assign(TokenizerConsts::colon_colon); // this only copies a pointer, but operator= allocates memory and does a memcpy!
        }
        else
        {
            MoveToNextChar();
            m_Str.assign(TokenizerConsts::colon);
        }
    }
	else if (CurrentChar() == '<' && bTemplate)
	{
		wxChar match = _T('>');
		MoveToNextChar();
		if (!SkipToOneOfChars(_T(">\r\n")),false)
			return wxEmptyString;
		MoveToNextChar();
		wxString tmp = m_Buffer.Mid(start+1,m_TokenIndex-start-2);
		tmp.Trim();
        m_Str = _T("<");
		m_Str += tmp;
		m_Str += _T(">");//m_Buffer.Mid(start, m_TokenIndex - start);
	}
    else if (CurrentChar() == '(')
    {
        m_IsOperator = false;
        // skip blocks () []
        if (!SkipBlock(CurrentChar()))
            return wxEmptyString;
        wxString tmp = m_Buffer.Mid(start, m_TokenIndex - start);
//        tmp.Replace(_T("\t"), _T(" ")); // replace tabs with spaces
//        tmp.Replace(_T("\n"), _T(" ")); // replace LF with spaces
//        tmp.Replace(_T("\r"), _T(" ")); // replace CR with spaces
        { // this is much faster:
            size_t i;
            while((i = tmp.find_first_of(TokenizerConsts::tabcrlf)) != wxString::npos)
                //tmp[i] = _T(' ');
				tmp.SetAt(i,_T(' '));
        }
        // fix-up arguments (remove excessive spaces/tabs/newlines)
        for (unsigned int i = 0; i < tmp.Length() - 1; ++i)
        {
            //skip spaces before '=' and ','
            if (tmp.GetChar(i) == ' ' && (tmp.GetChar(i + 1) == ',' || tmp.GetChar(i + 1) == '='))
				continue;

            if (tmp.GetChar(i) == '/' && tmp.GetChar(i + 1) == '*')
            {
                // skip C comments
                i += 2;
                while (i < tmp.Length() - 1)
                {
                    if (tmp.GetChar(i) == '*' && tmp.GetChar(i + 1) == '/')
                        break;
                    ++i;
                }
                if (i >= tmp.Length() - 1 || tmp.GetChar(i + 1) != '/')
                    continue; // we failed...
                i += 2;
            }
            else if (tmp.GetChar(i) == '=')
            {
                // skip default assignments
                ++i;
                int level = 0; // nesting parenthesis
                while (i < tmp.Length())
                {
                    if (tmp.GetChar(i) == '(')
                        ++level;
                    else if (tmp.GetChar(i) == ')')
                        --level;
                    if ((tmp.GetChar(i) == ',' && level == 0) ||
                        (tmp.GetChar(i) == ')' && level < 0))
                        break;
                    ++i;
                }
                if (i < tmp.Length() && tmp.GetChar(i) == ',')
                    --i;
                continue; // we are done here
            }

            if (i < tmp.Length() - 1)
            {
                if ((tmp.GetChar(i)     == ' ') && (tmp.GetChar(i + 1) == ' '))
                    continue; // skip excessive spaces

                // in case of c-style comments "i" might already be tmp.Length()
                // thus do only add the current char otherwise.
                // otherwise the following statement:
                // m_Str << _T(')');
                // below would add another closing bracket.
                m_Str << tmp.GetChar(i);
            }
        }
        m_Str << _T(')'); // add closing parenthesis (see "i < tmp.Length() - 1" in previous "for")
//        m_Str.Replace(_T("  "), _T(" ")); // replace two-spaces with single-space (introduced if it skipped comments or assignments)
//        m_Str.Replace(_T("( "), _T("("));
//        m_Str.Replace(_T(" )"), _T(")"));
        //Str.Replace is massive overkill here since it has to allocate one new block per replacement
        CompactSpaces(m_Str);
    }
    else
    {
        if (CurrentChar() == '{')
            ++m_NestLevel;
        else if (CurrentChar() == '}')
            --m_NestLevel;
        m_Str = CurrentChar();
        MoveToNextChar();
    }

    if (m_LastWasPreprocessor && !m_Str.IsSameAs(_T("#")) && !m_LastPreprocessor.IsSameAs(_T("#")))
    {
        if (!m_LastPreprocessor.IsSameAs(TokenizerConsts::include_str))
        {
            // except for #include and #if[[n]def], all other preprocessor directives need only
            // one word exactly after the directive, e.g. #define THIS_WORD
            SkipToEOL();
        }
        m_LastPreprocessor.Clear();
    }

    if (m_LastWasPreprocessor)
        m_LastPreprocessor << m_Str;
    m_LastWasPreprocessor = false;

    return m_Str;
}