bool Tokenizer::SkipUnwanted() { while (SkipWhiteSpace() || SkipComment()) ; wxChar c = CurrentChar(); const unsigned int startIndex = m_TokenIndex; if (c == _T('#')) { const PreprocessorType type = GetPreprocessorType(); if (type != ptOthers) { HandleConditionPreprocessor(type); c = CurrentChar(); } } // skip [XXX][YYY] if (m_State & tsSkipSubScrip) { while (c == _T('[') ) { SkipBlock('['); SkipWhiteSpace(); if (IsEOF()) return false; c = CurrentChar(); } } // skip the following = or ? if (m_State & tsSkipEqual) { if (c == _T('=')) { if (!SkipToOneOfChars(_T(",;}"), true, true, false)) return false; } } else if (m_State & tsSkipQuestion) { if (c == _T('?')) { if (!SkipToOneOfChars(_T(";}"), false, true)) return false; } } // skip the following white space and comments while (SkipWhiteSpace() || SkipComment()) ; if (startIndex != m_TokenIndex && CurrentChar() == _T('#')) return SkipUnwanted(); return NotEOF(); }
//vfc add bGetValue wxString Tokenizer::DoGetToken(bool bGetValue, bool bTemplate) { if (IsEOF()) return wxEmptyString; if (!SkipWhiteSpace()) return wxEmptyString; if (m_SkipUnwantedTokens && !SkipUnwanted(bGetValue)) return wxEmptyString; // if m_SkipUnwantedTokens is false, we need to handle comments here too if (!m_SkipUnwantedTokens) SkipComment(); int start = m_TokenIndex; wxString m_Str; wxChar c = CurrentChar(); if (c == '_' || wxIsalpha(c)) { // keywords, identifiers, etc. // operator== is cheaper than wxIsalnum, also MoveToNextChar already includes IsEOF while ( ( CurrentChar() == '_' || wxIsalnum(CurrentChar()) ) && MoveToNextChar() ) ; if (IsEOF()) return wxEmptyString; m_Str = m_Buffer.Mid(start, m_TokenIndex - start); m_IsOperator = m_Str.IsSameAs(TokenizerConsts::operator_str); } #ifdef __WXMSW__ // This is a Windows only bug! else if (c == 178 || c == 179 || c == 185) // fetch ?and ? { m_Str = c; MoveToNextChar(); } #endif else if (wxIsdigit(CurrentChar())) { // numbers while (NotEOF() && CharInString(CurrentChar(), _T("0123456789.abcdefABCDEFXxLl"))) MoveToNextChar(); if (IsEOF()) return wxEmptyString; m_Str = m_Buffer.Mid(start, m_TokenIndex - start); m_IsOperator = false; } else if (CurrentChar() == '"' || CurrentChar() == '\'') { // string, char, etc. wxChar match = CurrentChar(); MoveToNextChar(); // skip starting ' or " if (!SkipToChar(match)) return wxEmptyString; MoveToNextChar(); // skip ending ' or " m_Str = m_Buffer.Mid(start, m_TokenIndex - start); } else if (CurrentChar() == ':') { if (NextChar() == ':') { MoveToNextChar(); MoveToNextChar(); m_Str.assign(TokenizerConsts::colon_colon); // this only copies a pointer, but operator= allocates memory and does a memcpy! } else { MoveToNextChar(); m_Str.assign(TokenizerConsts::colon); } } else if (CurrentChar() == '<' && bTemplate) { wxChar match = _T('>'); MoveToNextChar(); if (!SkipToOneOfChars(_T(">\r\n")),false) return wxEmptyString; MoveToNextChar(); wxString tmp = m_Buffer.Mid(start+1,m_TokenIndex-start-2); tmp.Trim(); m_Str = _T("<"); m_Str += tmp; m_Str += _T(">");//m_Buffer.Mid(start, m_TokenIndex - start); } else if (CurrentChar() == '(') { m_IsOperator = false; // skip blocks () [] if (!SkipBlock(CurrentChar())) return wxEmptyString; wxString tmp = m_Buffer.Mid(start, m_TokenIndex - start); // tmp.Replace(_T("\t"), _T(" ")); // replace tabs with spaces // tmp.Replace(_T("\n"), _T(" ")); // replace LF with spaces // tmp.Replace(_T("\r"), _T(" ")); // replace CR with spaces { // this is much faster: size_t i; while((i = tmp.find_first_of(TokenizerConsts::tabcrlf)) != wxString::npos) //tmp[i] = _T(' '); tmp.SetAt(i,_T(' ')); } // fix-up arguments (remove excessive spaces/tabs/newlines) for (unsigned int i = 0; i < tmp.Length() - 1; ++i) { //skip spaces before '=' and ',' if (tmp.GetChar(i) == ' ' && (tmp.GetChar(i + 1) == ',' || tmp.GetChar(i + 1) == '=')) continue; if (tmp.GetChar(i) == '/' && tmp.GetChar(i + 1) == '*') { // skip C comments i += 2; while (i < tmp.Length() - 1) { if (tmp.GetChar(i) == '*' && tmp.GetChar(i + 1) == '/') break; ++i; } if (i >= tmp.Length() - 1 || tmp.GetChar(i + 1) != '/') continue; // we failed... i += 2; } else if (tmp.GetChar(i) == '=') { // skip default assignments ++i; int level = 0; // nesting parenthesis while (i < tmp.Length()) { if (tmp.GetChar(i) == '(') ++level; else if (tmp.GetChar(i) == ')') --level; if ((tmp.GetChar(i) == ',' && level == 0) || (tmp.GetChar(i) == ')' && level < 0)) break; ++i; } if (i < tmp.Length() && tmp.GetChar(i) == ',') --i; continue; // we are done here } if (i < tmp.Length() - 1) { if ((tmp.GetChar(i) == ' ') && (tmp.GetChar(i + 1) == ' ')) continue; // skip excessive spaces // in case of c-style comments "i" might already be tmp.Length() // thus do only add the current char otherwise. // otherwise the following statement: // m_Str << _T(')'); // below would add another closing bracket. m_Str << tmp.GetChar(i); } } m_Str << _T(')'); // add closing parenthesis (see "i < tmp.Length() - 1" in previous "for") // m_Str.Replace(_T(" "), _T(" ")); // replace two-spaces with single-space (introduced if it skipped comments or assignments) // m_Str.Replace(_T("( "), _T("(")); // m_Str.Replace(_T(" )"), _T(")")); //Str.Replace is massive overkill here since it has to allocate one new block per replacement CompactSpaces(m_Str); } else { if (CurrentChar() == '{') ++m_NestLevel; else if (CurrentChar() == '}') --m_NestLevel; m_Str = CurrentChar(); MoveToNextChar(); } if (m_LastWasPreprocessor && !m_Str.IsSameAs(_T("#")) && !m_LastPreprocessor.IsSameAs(_T("#"))) { if (!m_LastPreprocessor.IsSameAs(TokenizerConsts::include_str)) { // except for #include and #if[[n]def], all other preprocessor directives need only // one word exactly after the directive, e.g. #define THIS_WORD SkipToEOL(); } m_LastPreprocessor.Clear(); } if (m_LastWasPreprocessor) m_LastPreprocessor << m_Str; m_LastWasPreprocessor = false; return m_Str; }
wxString Tokenizer::DoGetToken() { int start = m_TokenIndex; bool needReplace = false; wxString str; wxChar c = CurrentChar(); if (c == '_' || wxIsalpha(c)) { // keywords, identifiers, etc. // operator== is cheaper than wxIsalnum, also MoveToNextChar already includes IsEOF while ( ( (c == '_') || (wxIsalnum(c)) ) && MoveToNextChar() ) c = CurrentChar(); // repeat if (IsEOF()) return wxEmptyString; needReplace = true; str = m_Buffer.Mid(start, m_TokenIndex - start); } #ifdef __WXMSW__ // This is a Windows only bug! // fetch non-English characters, see more details in: http://forums.codeblocks.org/index.php/topic,11387.0.html else if (c == 178 || c == 179 || c == 185) { str = c; MoveToNextChar(); } #endif else if (wxIsdigit(c)) { // numbers while (NotEOF() && CharInString(CurrentChar(), _T("0123456789.abcdefABCDEFXxLl"))) MoveToNextChar(); if (IsEOF()) return wxEmptyString; str = m_Buffer.Mid(start, m_TokenIndex - start); } else if ( (c == '"') || (c == '\'') ) { SkipString(); //Now, we are after the end of the C-string, so return the whole string as a token. str = m_Buffer.Mid(start, m_TokenIndex - start); } else if (c == ':') { if (NextChar() == ':') { MoveToNextChar(); MoveToNextChar(); // this only copies a pointer, but operator= allocates memory and does a memcpy! str.assign(TokenizerConsts::colon_colon); } else { MoveToNextChar(); str.assign(TokenizerConsts::colon); } } else if (c == '<') { if (m_State&tsSingleAngleBrace) { if ( !SkipToOneOfChars( _T(">"), true, true) ) return wxEmptyString; MoveToNextChar(); str= m_Buffer.Mid(start, m_TokenIndex - start); } else { str = c; MoveToNextChar(); } } else if (c == '(') { if (m_State & tsReadRawExpression) { str = c; MoveToNextChar(); } else { ReadParentheses(str); } } else { if (c == '{') ++m_NestLevel; else if (c == '}') --m_NestLevel; str = c; MoveToNextChar(); } if (m_FirstRemainingLength != 0 && m_BufferLen - m_FirstRemainingLength < m_TokenIndex) { m_FirstRemainingLength = 0; m_IsReplaceParsing = false; m_RepeatReplaceCount = 0; } if (needReplace && m_State ^ tsReadRawExpression) MacroReplace(str); return str; }
//vfc add bGetValue bool Tokenizer::SkipUnwanted(bool bGetValue) { while (CurrentChar() == '#' || (!m_IsOperator && CurrentChar() == '=') || (!m_IsOperator && CurrentChar() == '[') || CurrentChar() == '?' || (CurrentChar() == '/' && (NextChar() == '/' || NextChar() == '*') )) { bool skipPreprocessor = false; // used for #include while (m_Buffer.Mid(m_TokenIndex, 2) == _T("//") || m_Buffer.Mid(m_TokenIndex, 2) == _T("/*")) { // C/C++ style comments SkipComment(); if (IsEOF()) return false; if (!SkipWhiteSpace()) return false; } while (CurrentChar() == '#') { // preprocessor directives // we only care for #include and #define, for now unsigned int backupIdx = m_TokenIndex; MoveToNextChar(); SkipWhiteSpace(); if ((CurrentChar() == 'i' && NextChar() == 'n') || // in(clude) (CurrentChar() == 'i' && NextChar() == 'f') || // if(|def|ndef) (CurrentChar() == 'e' && NextChar() == 'l') || // el(se|if) (CurrentChar() == 'e' && NextChar() == 'n') || // en(dif) (m_Options.wantPreprocessor && CurrentChar() == 'd' && NextChar() == 'e')) // de(fine) { // ok, we have something like #in(clude) m_LastWasPreprocessor = true; m_LastPreprocessor.Clear(); m_TokenIndex = backupIdx; // keep # skipPreprocessor = true; break; } else { // skip the rest for now... SkipToEOL(false); if (!SkipWhiteSpace()) return false; } if (skipPreprocessor) break; } while (CurrentChar() == '[') { // array subscripts // skip them for now... SkipBlock('['); if (!SkipWhiteSpace()) return false; } while (CurrentChar() == '=') { // skip assignments // TODO: what happens with operators? if (bGetValue == true) { MoveToNextChar(); SkipWhiteSpace(); return true; } else if (!SkipToOneOfChars(_T(",;}"), true)) return false; } while (CurrentChar() == '?') { // skip "condition ? true : false" // TODO: what happens with operators? if (!SkipToOneOfChars(_T(";}"))) return false; } if (skipPreprocessor) break; } return true; }