bool Tokenizer::SkipToEOL(bool nestBraces, bool skippingComment) { // skip everything until we find EOL while (1) { while (NotEOF() && CurrentChar() != '\n') { if (CurrentChar() == '/' && NextChar() == '*') { SkipComment(false); // don't skip whitespace after the comment if (skippingComment && CurrentChar() == '\n') { continue; // early exit from the loop } } if (nestBraces && CurrentChar() == _T('{')) ++m_NestLevel; else if (nestBraces && CurrentChar() == _T('}')) --m_NestLevel; MoveToNextChar(); } wxChar last = PreviousChar(); // if DOS line endings, we 've hit \r and we skip to \n... if (last == '\r') last = m_Buffer.GetChar(m_TokenIndex - 2); if (IsEOF() || last != '\\') break; else MoveToNextChar(); } if (IsEOF()) return false; return true; }
bool Tokenizer::SkipToOneOfChars(const wxChar* chars, bool supportNesting) { // skip everything until we find any one of chars while (1) { while (NotEOF() && !CharInString(CurrentChar(), chars)) { if (CurrentChar() == '"' || CurrentChar() == '\'') { // this is the case that match is inside a string! wxChar ch = CurrentChar(); MoveToNextChar(); SkipToChar(ch); } MoveToNextChar(); // make sure we skip comments if (CurrentChar() == '/') SkipComment(); // this will decide if it is a comment // use 'while' here to cater for consecutive blocks to skip (e.g. sometemplate<foo>(bar) // must skip <foo> and immediately after (bar)) // because if we don't, the next block won't be skipped ((bar) in the example) leading to weird // parsing results bool done = false; while (supportNesting && !done) { switch (CurrentChar()) { case '{': SkipBlock('{'); break; case '(': SkipBlock('('); break; case '[': SkipBlock('['); break; case '<': // don't skip if << operator if (NextChar() == '<') MoveToNextChar(2); // skip it and also the next '<' or the next '<' leads to a SkipBlock('<'); else SkipBlock('<'); break; default: done = true; break; } } } if (PreviousChar() != '\\') break; else { // check for "\\" if (m_TokenIndex - 2 >= 0 && m_Buffer.GetChar(m_TokenIndex - 2) == '\\') break; } MoveToNextChar(); } if (IsEOF()) return false; return true; }
bool Tokenizer::SkipToInlineCommentEnd() { TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"), wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(), PreviousChar(), NextChar()); // skip everything until we find EOL while (true) { SkipToChar(_T('\n')); if (!IsBackslashBeforeEOL() || IsEOF()) break; else MoveToNextChar(); } TRACE(_T("SkipToInlineCommentEnd(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c',") _T(" NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar()); return NotEOF(); }
bool Tokenizer::SkipToEOL(bool nestBraces) { TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c', nestBrace(%d)"), wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(), PreviousChar(), NextChar(), nestBraces ? 1 : 0); // skip everything until we find EOL for (;;) { while (NotEOF() && CurrentChar() != '\n') { if (CurrentChar() == '/' && NextChar() == '*') { SkipComment(); if (CurrentChar() == _T('\n')) break; } if (nestBraces && CurrentChar() == _T('{')) ++m_NestLevel; else if (nestBraces && CurrentChar() == _T('}')) --m_NestLevel; MoveToNextChar(); } if (!IsBackslashBeforeEOL() || IsEOF()) break; else MoveToNextChar(); } TRACE(_T("SkipToEOL(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar()); return NotEOF(); }
// if we really move forward, return true, which means we have the new m_TokenIndex // if we stay here, return false bool Tokenizer::SkipComment() { if (IsEOF()) return false; bool cstyle; // C or C++ style comments //check the comment prompt if (CurrentChar() == '/') { if (NextChar() == '*') cstyle = true; else if (NextChar() == '/') cstyle = false; else return false; // Not a comment, return false; } else return false; // Not a comment, return false; TRACE(_T("SkipComment() : Start from line = %d"), m_LineNumber); MoveToNextChar(2); // Skip the comment prompt // Here, we are in the comment body while (true) { if (cstyle) // C style comment { SkipToChar('/'); if (PreviousChar() == '*') // end of a C style comment { MoveToNextChar(); break; } if (!MoveToNextChar()) break; } else // C++ style comment { TRACE(_T("SkipComment() : Need to call SkipToEOL() here at line = %d"), m_LineNumber); SkipToInlineCommentEnd(); break; } } return true; }
// only be called when we are in a C-string, // To check whether the current charactor is the real end of C-string // See SkipToStringEnd() for more details bool Tokenizer::IsEscapedChar() { // Easy: If previous char is not a backslash, too than it's surely escape'd if (PreviousChar() != '\\') return true; else { // check for multiple backslashes, e.g. "\\" unsigned int numBackslash = 2; // for sure we have at least two at this point while ( ((m_TokenIndex - numBackslash) >= 0) && ((m_TokenIndex - numBackslash) <= m_BufferLen) && (m_Buffer.GetChar(m_TokenIndex - numBackslash) == '\\') ) ++numBackslash; // another one... if ( (numBackslash%2) == 1) // number of backslashes (including current char) is odd return true; // eg: "\"" else // number of backslashes (including current char) is even return false; // eg: "\\"" } return false; }
bool Tokenizer::SkipComment(bool skipWhiteAtEnd) // = true { // C/C++ style comments bool is_comment = CurrentChar() == '/' && (NextChar() == '/' || NextChar() == '*'); if (!is_comment) return true; bool cstyle = NextChar() == '*'; MoveToNextChar(2); while (1) { if (!cstyle) { if (!SkipToEOL(false, true)) return false; MoveToNextChar(); break; } else { if (SkipToChar('/')) { if (PreviousChar() == '*') { MoveToNextChar(); break; } MoveToNextChar(); } else return false; } } if (IsEOF()) return false; if (skipWhiteAtEnd && !SkipWhiteSpace()) return false; return CurrentChar() == '/' ? SkipComment() : true; // handle chained comments }
bool Tokenizer::SkipToChar(const wxChar& ch) { // skip everything until we find ch while(true) { while (CurrentChar() != ch && MoveToNextChar()) // don't check EOF when MoveToNextChar already does ; if (IsEOF()) return false; if (PreviousChar() != '\\') break; else { // check for "\\" if (m_TokenIndex - 2 >= 0 && m_Buffer.GetChar(m_TokenIndex - 2) == '\\') break; } MoveToNextChar(); } return true; }
wxString Tokenizer::ReadToEOL(bool nestBraces, bool stripUnneeded) { if (stripUnneeded) { TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c', nestBrace(%d)"), wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(), PreviousChar(), NextChar(), nestBraces ? 1 : 0); static const size_t maxBufferLen = 4094; wxChar buffer[maxBufferLen + 2]; wxChar* p = buffer; wxString str; for (;;) { while (NotEOF() && CurrentChar() != _T('\n')) { while (SkipComment()) ; const wxChar ch = CurrentChar(); if (ch == _T('\n')) break; if (ch <= _T(' ') && (p == buffer || *(p - 1) == ch)) { MoveToNextChar(); continue; } *p = ch; ++p; if (p >= buffer + maxBufferLen) { str.Append(buffer, p - buffer); p = buffer; } if (nestBraces) { if (ch == _T('{')) ++m_NestLevel; else if (ch == _T('}')) --m_NestLevel; } MoveToNextChar(); } if (!IsBackslashBeforeEOL() || IsEOF()) break; else { while (p > buffer && *(--p) <= _T(' ')) ; MoveToNextChar(); } } while (p > buffer && *(p - 1) <= _T(' ')) --p; if (p > buffer) str.Append(buffer, p - buffer); TRACE(_T("ReadToEOL(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar()); TRACE(_T("ReadToEOL(): %s"), str.wx_str()); return str; } else { const unsigned int idx = m_TokenIndex; SkipToEOL(nestBraces); return m_Buffer.Mid(idx, m_TokenIndex - idx); } }