void Tokenizer::SkipToEndConditionPreprocessor() { do { wxChar ch = CurrentChar(); if (ch == _T('\'') || ch == _T('"') || ch == _T('/') || ch <= _T(' ')) { while (SkipWhiteSpace() || SkipString() || SkipComment()) ; ch = CurrentChar(); } if (ch == _T('#')) { MoveToNextChar(); while (SkipWhiteSpace() || SkipComment()) ; const wxChar current = CurrentChar(); const wxChar next = NextChar(); // #if if (current == _T('i') && next == _T('f')) SkipToEndConditionPreprocessor(); // #endif else if (current == _T('e') && next == _T('n')) { SkipToEOL(false); break; } } } while (MoveToNextChar()); }
bool Tokenizer::ReplaceBufferForReparse(const wxString& target, bool updatePeekToken) { if (target.IsEmpty()) return false; if (m_IsReplaceParsing && ++m_RepeatReplaceCount > s_MaxRepeatReplaceCount) { m_TokenIndex = m_BufferLen - m_FirstRemainingLength; m_PeekAvailable = false; SkipToEOL(false); return false; } // Keep all in one line wxString buffer(target); for (size_t i = 0; i < buffer.Len(); ++i) { switch ((wxChar)buffer.GetChar(i)) { case _T('\\'): case _T('\r'): case _T('\n'): buffer.SetChar(i, _T(' ')); } } // Increase memory const size_t bufferLen = buffer.Len(); if (m_TokenIndex < bufferLen) { const size_t diffLen = bufferLen - m_TokenIndex; m_Buffer.insert(0, wxString(_T(' '), diffLen)); m_BufferLen += diffLen; m_TokenIndex += diffLen; } // Set replace parsing state, and save first replace token index if (!m_IsReplaceParsing) { m_FirstRemainingLength = m_BufferLen - m_TokenIndex; m_IsReplaceParsing = true; } // Replacement back wxChar* p = const_cast<wxChar*>((const wxChar*)m_Buffer) + m_TokenIndex - bufferLen; TRACE(_T("ReplacetargetForReparse() : <FROM>%s<TO>%s"), wxString(p, bufferLen).wx_str(), buffer.wx_str()); memcpy(p, (const wxChar*)target, bufferLen * sizeof(wxChar)); // Fix token index m_TokenIndex -= bufferLen; // Update the peek token if (m_PeekAvailable && updatePeekToken) { m_PeekAvailable = false; PeekToken(); } return true; }
bool Tokenizer::IsMacroDefined() { while (SkipWhiteSpace() || SkipComment()) ; int id = m_TokensTree->TokenExists(DoGetToken(), -1, tkPreprocessor); SkipToEOL(false); return (id != -1); }
void TextCutter::SkipUnwanted() { if (IsEOF()) return; while (CurrentChar() == '#' || CurrentChar() == '!' || ((CurrentChar() == 'c' || CurrentChar() == 'C' || CurrentChar() == '*') && m_CurColumn == 1 && m_CurSourceForm == fsfFixed)) { SkipToEOL(); SkipWhiteSpace(); if (IsEOF()) return; } }
// expect we are not in a C-string. bool Tokenizer::SkipToOneOfChars(const wxChar* chars, bool supportNesting, bool skipPreprocessor, bool skipAngleBrace) { while (NotEOF() && !CharInString(CurrentChar(), chars)) { MoveToNextChar(); while (SkipString() || SkipComment()) ; // use 'while' here to cater for consecutive blocks to skip (e.g. sometemplate<foo>(bar) // must skip <foo> and immediately after (bar)) // because if we don't, the next block won't be skipped ((bar) in the example) leading to weird // parsing results bool done = false; while (supportNesting && !done) { switch (CurrentChar()) { case '#': if (skipPreprocessor) SkipToEOL(true); else done = true; break; case '{': SkipBlock('{'); break; case '(': SkipBlock('('); break; case '[': SkipBlock('['); break; case '<': // don't skip if << operator if (skipAngleBrace) { if (NextChar() == '<') MoveToNextChar(2); // skip it and also the next '<' or the next '<' leads to a SkipBlock('<'); else SkipBlock('<'); break; } default: done = true; break; } } } return NotEOF(); }
bool Tokenizer::SkipComment(bool skipWhiteAtEnd) // = true { // C/C++ style comments bool is_comment = CurrentChar() == '/' && (NextChar() == '/' || NextChar() == '*'); if (!is_comment) return true; bool cstyle = NextChar() == '*'; MoveToNextChar(2); while (1) { if (!cstyle) { if (!SkipToEOL(false, true)) return false; MoveToNextChar(); break; } else { if (SkipToChar('/')) { if (PreviousChar() == '*') { MoveToNextChar(); break; } MoveToNextChar(); } else return false; } } if (IsEOF()) return false; if (skipWhiteAtEnd && !SkipWhiteSpace()) return false; return CurrentChar() == '/' ? SkipComment() : true; // handle chained comments }
void Tokenizer::ReadToEOL(wxArrayString& tokens) { // need to force the tokenizer skip raw expression const TokenizerState oldState = m_State; m_State = tsReadRawExpression; const unsigned int undoIndex = m_TokenIndex; const unsigned int undoLine = m_LineNumber; SkipToEOL(false); const unsigned int lastBufferLen = m_BufferLen - m_TokenIndex; m_TokenIndex = undoIndex; m_LineNumber = undoLine; int level = 0; wxArrayString tmp; while (m_BufferLen - m_TokenIndex > lastBufferLen) { while (SkipComment()) ; wxString token = DoGetToken(); if (token[0] <= _T(' ') || token == _T("\\")) continue; if (token[0] == _T('(')) ++level; if (level == 0) { if (tmp.IsEmpty()) { if (!token.Trim().IsEmpty()) tokens.Add(token); } else { wxString blockStr; for (size_t i = 0; i < tmp.GetCount(); ++i) blockStr << tmp[i]; tokens.Add(blockStr.Trim()); tmp.Clear(); } } else tmp.Add(token); if (token[0] == _T(')')) --level; } if (!tmp.IsEmpty()) { if (level == 0) { wxString blockStr; for (size_t i = 0; i < tmp.GetCount(); ++i) blockStr << tmp[i]; tokens.Add(blockStr.Trim()); } else { for (size_t i = 0; i < tmp.GetCount(); ++i) { if (!tmp[i].Trim().IsEmpty()) tokens.Add(tmp[i]); } } } m_State = oldState; }
wxString Tokenizer::ReadToEOL(bool nestBraces, bool stripUnneeded) { if (stripUnneeded) { TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c', nestBrace(%d)"), wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(), PreviousChar(), NextChar(), nestBraces ? 1 : 0); static const size_t maxBufferLen = 4094; wxChar buffer[maxBufferLen + 2]; wxChar* p = buffer; wxString str; for (;;) { while (NotEOF() && CurrentChar() != _T('\n')) { while (SkipComment()) ; const wxChar ch = CurrentChar(); if (ch == _T('\n')) break; if (ch <= _T(' ') && (p == buffer || *(p - 1) == ch)) { MoveToNextChar(); continue; } *p = ch; ++p; if (p >= buffer + maxBufferLen) { str.Append(buffer, p - buffer); p = buffer; } if (nestBraces) { if (ch == _T('{')) ++m_NestLevel; else if (ch == _T('}')) --m_NestLevel; } MoveToNextChar(); } if (!IsBackslashBeforeEOL() || IsEOF()) break; else { while (p > buffer && *(--p) <= _T(' ')) ; MoveToNextChar(); } } while (p > buffer && *(p - 1) <= _T(' ')) --p; if (p > buffer) str.Append(buffer, p - buffer); TRACE(_T("ReadToEOL(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar()); TRACE(_T("ReadToEOL(): %s"), str.wx_str()); return str; } else { const unsigned int idx = m_TokenIndex; SkipToEOL(nestBraces); return m_Buffer.Mid(idx, m_TokenIndex - idx); } }
void Tokenizer::HandleConditionPreprocessor(const PreprocessorType type) { switch (type) { case ptIf: { TRACE(_T("HandleConditionPreprocessor() : #if at line = %d"), m_LineNumber); bool result; if (m_TokenizerOptions.wantPreprocessor) result = CalcConditionExpression(); else { SkipToEOL(false); result = true; } m_ExpressionResult.push(result); if (!result) SkipToNextConditionPreprocessor(); } break; case ptIfdef: { TRACE(_T("HandleConditionPreprocessor() : #ifdef at line = %d"), m_LineNumber); bool result; if (m_TokenizerOptions.wantPreprocessor) result = IsMacroDefined(); else { SkipToEOL(false); result = true; } m_ExpressionResult.push(result); if (!result) SkipToNextConditionPreprocessor(); } break; case ptIfndef: { TRACE(_T("HandleConditionPreprocessor() : #ifndef at line = %d"), m_LineNumber); bool result; if (m_TokenizerOptions.wantPreprocessor) result = !IsMacroDefined(); else { SkipToEOL(false); result = true; } m_ExpressionResult.push(result); if (!result) SkipToNextConditionPreprocessor(); } break; case ptElif: { TRACE(_T("HandleConditionPreprocessor() : #elif at line = %d"), m_LineNumber); bool result = false; if (!m_ExpressionResult.empty() && !m_ExpressionResult.top()) result = CalcConditionExpression(); if (result) m_ExpressionResult.top() = true; else SkipToNextConditionPreprocessor(); } break; case ptElifdef: { TRACE(_T("HandleConditionPreprocessor() : #elifdef at line = %d"), m_LineNumber); bool result = false; if (!m_ExpressionResult.empty() && !m_ExpressionResult.top()) result = IsMacroDefined(); if (result) m_ExpressionResult.top() = true; else SkipToNextConditionPreprocessor(); } break; case ptElifndef: { TRACE(_T("HandleConditionPreprocessor() : #elifndef at line = %d"), m_LineNumber); bool result = false; if (!m_ExpressionResult.empty() && !m_ExpressionResult.top()) result = !IsMacroDefined(); if (result) m_ExpressionResult.top() = true; else SkipToNextConditionPreprocessor(); } break; case ptElse: { TRACE(_T("HandleConditionPreprocessor() : #else at line = %d"), m_LineNumber); if (!m_ExpressionResult.empty() && !m_ExpressionResult.top()) SkipToEOL(false); else SkipToEndConditionPreprocessor(); } break; case ptEndif: { TRACE(_T("HandleConditionPreprocessor() : #endif at line = %d"), m_LineNumber); SkipToEOL(false); if (!m_ExpressionResult.empty()) m_ExpressionResult.pop(); } break; case ptOthers: break; } }
bool Tokenizer::CalcConditionExpression() { // need to force the tokenizer skip raw expression const TokenizerState oldState = m_State; m_State = tsReadRawExpression; const unsigned int undoIndex = m_TokenIndex; const unsigned int undoLine = m_LineNumber; SkipToEOL(false); const unsigned int lastBufferLen = m_BufferLen - m_TokenIndex; m_TokenIndex = undoIndex; m_LineNumber = undoLine; Expression exp; while (m_BufferLen - m_TokenIndex > lastBufferLen) { while (SkipComment()) ; wxString token = DoGetToken(); if (token[0] <= _T(' ') || token == _T("defined") || token == _T("\\")) continue; if (token.Len() > 1 && !wxIsdigit(token[0])) // handle macro { const int id = m_TokensTree->TokenExists(token, -1, tkPreprocessor); if (id != -1) { Token* tk = m_TokensTree->at(id); if (tk) { if (tk->m_Type.IsEmpty() || tk->m_Type == token) { if (tk->m_Args.IsEmpty()) { exp.AddToInfixExpression(_T("1")); continue; } else { if (ReplaceBufferForReparse(tk->m_Args, false)) continue; } } else if (!tk->m_Args.IsEmpty()) { if (ReplaceMacroActualContext(tk, false)) continue; } else if (wxIsdigit(tk->m_Type[0])) token = tk->m_Type; else if (tk->m_Type != tk->m_Name) { if (ReplaceBufferForReparse(tk->m_Type, false)) continue; } } } else { exp.AddToInfixExpression(_T("0")); continue; } } // only remaining number now if (!token.StartsWith(_T("0x"))) exp.AddToInfixExpression(token); else { long value; if (token.ToLong(&value, 16)) exp.AddToInfixExpression(wxString::Format(_T("%ld"), value)); else exp.AddToInfixExpression(_T("0")); } } // reset tokenizer's functionality m_State = oldState; exp.ConvertInfixToPostfix(); if (exp.CalcPostfix()) { TRACE(_T("CalcConditionExpression() : exp.GetStatus() : %d, exp.GetResult() : %d"), exp.GetStatus(), exp.GetResult()); return exp.GetStatus() && exp.GetResult(); } return true; }
//vfc add bGetValue wxString Tokenizer::DoGetToken(bool bGetValue, bool bTemplate) { if (IsEOF()) return wxEmptyString; if (!SkipWhiteSpace()) return wxEmptyString; if (m_SkipUnwantedTokens && !SkipUnwanted(bGetValue)) return wxEmptyString; // if m_SkipUnwantedTokens is false, we need to handle comments here too if (!m_SkipUnwantedTokens) SkipComment(); int start = m_TokenIndex; wxString m_Str; wxChar c = CurrentChar(); if (c == '_' || wxIsalpha(c)) { // keywords, identifiers, etc. // operator== is cheaper than wxIsalnum, also MoveToNextChar already includes IsEOF while ( ( CurrentChar() == '_' || wxIsalnum(CurrentChar()) ) && MoveToNextChar() ) ; if (IsEOF()) return wxEmptyString; m_Str = m_Buffer.Mid(start, m_TokenIndex - start); m_IsOperator = m_Str.IsSameAs(TokenizerConsts::operator_str); } #ifdef __WXMSW__ // This is a Windows only bug! else if (c == 178 || c == 179 || c == 185) // fetch ?and ? { m_Str = c; MoveToNextChar(); } #endif else if (wxIsdigit(CurrentChar())) { // numbers while (NotEOF() && CharInString(CurrentChar(), _T("0123456789.abcdefABCDEFXxLl"))) MoveToNextChar(); if (IsEOF()) return wxEmptyString; m_Str = m_Buffer.Mid(start, m_TokenIndex - start); m_IsOperator = false; } else if (CurrentChar() == '"' || CurrentChar() == '\'') { // string, char, etc. wxChar match = CurrentChar(); MoveToNextChar(); // skip starting ' or " if (!SkipToChar(match)) return wxEmptyString; MoveToNextChar(); // skip ending ' or " m_Str = m_Buffer.Mid(start, m_TokenIndex - start); } else if (CurrentChar() == ':') { if (NextChar() == ':') { MoveToNextChar(); MoveToNextChar(); m_Str.assign(TokenizerConsts::colon_colon); // this only copies a pointer, but operator= allocates memory and does a memcpy! } else { MoveToNextChar(); m_Str.assign(TokenizerConsts::colon); } } else if (CurrentChar() == '<' && bTemplate) { wxChar match = _T('>'); MoveToNextChar(); if (!SkipToOneOfChars(_T(">\r\n")),false) return wxEmptyString; MoveToNextChar(); wxString tmp = m_Buffer.Mid(start+1,m_TokenIndex-start-2); tmp.Trim(); m_Str = _T("<"); m_Str += tmp; m_Str += _T(">");//m_Buffer.Mid(start, m_TokenIndex - start); } else if (CurrentChar() == '(') { m_IsOperator = false; // skip blocks () [] if (!SkipBlock(CurrentChar())) return wxEmptyString; wxString tmp = m_Buffer.Mid(start, m_TokenIndex - start); // tmp.Replace(_T("\t"), _T(" ")); // replace tabs with spaces // tmp.Replace(_T("\n"), _T(" ")); // replace LF with spaces // tmp.Replace(_T("\r"), _T(" ")); // replace CR with spaces { // this is much faster: size_t i; while((i = tmp.find_first_of(TokenizerConsts::tabcrlf)) != wxString::npos) //tmp[i] = _T(' '); tmp.SetAt(i,_T(' ')); } // fix-up arguments (remove excessive spaces/tabs/newlines) for (unsigned int i = 0; i < tmp.Length() - 1; ++i) { //skip spaces before '=' and ',' if (tmp.GetChar(i) == ' ' && (tmp.GetChar(i + 1) == ',' || tmp.GetChar(i + 1) == '=')) continue; if (tmp.GetChar(i) == '/' && tmp.GetChar(i + 1) == '*') { // skip C comments i += 2; while (i < tmp.Length() - 1) { if (tmp.GetChar(i) == '*' && tmp.GetChar(i + 1) == '/') break; ++i; } if (i >= tmp.Length() - 1 || tmp.GetChar(i + 1) != '/') continue; // we failed... i += 2; } else if (tmp.GetChar(i) == '=') { // skip default assignments ++i; int level = 0; // nesting parenthesis while (i < tmp.Length()) { if (tmp.GetChar(i) == '(') ++level; else if (tmp.GetChar(i) == ')') --level; if ((tmp.GetChar(i) == ',' && level == 0) || (tmp.GetChar(i) == ')' && level < 0)) break; ++i; } if (i < tmp.Length() && tmp.GetChar(i) == ',') --i; continue; // we are done here } if (i < tmp.Length() - 1) { if ((tmp.GetChar(i) == ' ') && (tmp.GetChar(i + 1) == ' ')) continue; // skip excessive spaces // in case of c-style comments "i" might already be tmp.Length() // thus do only add the current char otherwise. // otherwise the following statement: // m_Str << _T(')'); // below would add another closing bracket. m_Str << tmp.GetChar(i); } } m_Str << _T(')'); // add closing parenthesis (see "i < tmp.Length() - 1" in previous "for") // m_Str.Replace(_T(" "), _T(" ")); // replace two-spaces with single-space (introduced if it skipped comments or assignments) // m_Str.Replace(_T("( "), _T("(")); // m_Str.Replace(_T(" )"), _T(")")); //Str.Replace is massive overkill here since it has to allocate one new block per replacement CompactSpaces(m_Str); } else { if (CurrentChar() == '{') ++m_NestLevel; else if (CurrentChar() == '}') --m_NestLevel; m_Str = CurrentChar(); MoveToNextChar(); } if (m_LastWasPreprocessor && !m_Str.IsSameAs(_T("#")) && !m_LastPreprocessor.IsSameAs(_T("#"))) { if (!m_LastPreprocessor.IsSameAs(TokenizerConsts::include_str)) { // except for #include and #if[[n]def], all other preprocessor directives need only // one word exactly after the directive, e.g. #define THIS_WORD SkipToEOL(); } m_LastPreprocessor.Clear(); } if (m_LastWasPreprocessor) m_LastPreprocessor << m_Str; m_LastWasPreprocessor = false; return m_Str; }
//vfc add bGetValue bool Tokenizer::SkipUnwanted(bool bGetValue) { while (CurrentChar() == '#' || (!m_IsOperator && CurrentChar() == '=') || (!m_IsOperator && CurrentChar() == '[') || CurrentChar() == '?' || (CurrentChar() == '/' && (NextChar() == '/' || NextChar() == '*') )) { bool skipPreprocessor = false; // used for #include while (m_Buffer.Mid(m_TokenIndex, 2) == _T("//") || m_Buffer.Mid(m_TokenIndex, 2) == _T("/*")) { // C/C++ style comments SkipComment(); if (IsEOF()) return false; if (!SkipWhiteSpace()) return false; } while (CurrentChar() == '#') { // preprocessor directives // we only care for #include and #define, for now unsigned int backupIdx = m_TokenIndex; MoveToNextChar(); SkipWhiteSpace(); if ((CurrentChar() == 'i' && NextChar() == 'n') || // in(clude) (CurrentChar() == 'i' && NextChar() == 'f') || // if(|def|ndef) (CurrentChar() == 'e' && NextChar() == 'l') || // el(se|if) (CurrentChar() == 'e' && NextChar() == 'n') || // en(dif) (m_Options.wantPreprocessor && CurrentChar() == 'd' && NextChar() == 'e')) // de(fine) { // ok, we have something like #in(clude) m_LastWasPreprocessor = true; m_LastPreprocessor.Clear(); m_TokenIndex = backupIdx; // keep # skipPreprocessor = true; break; } else { // skip the rest for now... SkipToEOL(false); if (!SkipWhiteSpace()) return false; } if (skipPreprocessor) break; } while (CurrentChar() == '[') { // array subscripts // skip them for now... SkipBlock('['); if (!SkipWhiteSpace()) return false; } while (CurrentChar() == '=') { // skip assignments // TODO: what happens with operators? if (bGetValue == true) { MoveToNextChar(); SkipWhiteSpace(); return true; } else if (!SkipToOneOfChars(_T(",;}"), true)) return false; } while (CurrentChar() == '?') { // skip "condition ? true : false" // TODO: what happens with operators? if (!SkipToOneOfChars(_T(";}"))) return false; } if (skipPreprocessor) break; } return true; }
wxString Tokenizer::ReadToEOL(bool nestBraces) { unsigned int idx = m_TokenIndex; SkipToEOL(nestBraces); return m_Buffer.Mid(idx, m_TokenIndex - idx); }