bool Tokenizer::SkipBlock(const wxChar& ch) { // skip blocks () [] {} <> wxChar match; switch (ch) { case '(': match = ')'; break; case '[': match = ']'; break; case '{': match = '}'; break; case '<': match = '>'; break; default : return false; } MoveToNextChar(); int nestLevel = 1; // counter for nested blocks (xxx()) while (NotEOF()) { while (SkipWhiteSpace() || SkipString() || SkipComment()) ; if (CurrentChar() == ch) ++nestLevel; else if (CurrentChar() == match) --nestLevel; MoveToNextChar(); if (nestLevel == 0) break; } return NotEOF(); }
bool Tokenizer::SkipToEOL(bool nestBraces, bool skippingComment) { // skip everything until we find EOL while (1) { while (NotEOF() && CurrentChar() != '\n') { if (CurrentChar() == '/' && NextChar() == '*') { SkipComment(false); // don't skip whitespace after the comment if (skippingComment && CurrentChar() == '\n') { continue; // early exit from the loop } } if (nestBraces && CurrentChar() == _T('{')) ++m_NestLevel; else if (nestBraces && CurrentChar() == _T('}')) --m_NestLevel; MoveToNextChar(); } wxChar last = PreviousChar(); // if DOS line endings, we 've hit \r and we skip to \n... if (last == '\r') last = m_Buffer.GetChar(m_TokenIndex - 2); if (IsEOF() || last != '\\') break; else MoveToNextChar(); } if (IsEOF()) return false; return true; }
void Tokenizer::SkipToEndConditionPreprocessor() { do { wxChar ch = CurrentChar(); if (ch == _T('\'') || ch == _T('"') || ch == _T('/') || ch <= _T(' ')) { while (SkipWhiteSpace() || SkipString() || SkipComment()) ; ch = CurrentChar(); } if (ch == _T('#')) { MoveToNextChar(); while (SkipWhiteSpace() || SkipComment()) ; const wxChar current = CurrentChar(); const wxChar next = NextChar(); // #if if (current == _T('i') && next == _T('f')) SkipToEndConditionPreprocessor(); // #endif else if (current == _T('e') && next == _T('n')) { SkipToEOL(false); break; } } } while (MoveToNextChar()); }
bool Tokenizer::SkipUnwanted() { while (SkipWhiteSpace() || SkipComment()) ; wxChar c = CurrentChar(); const unsigned int startIndex = m_TokenIndex; if (c == _T('#')) { const PreprocessorType type = GetPreprocessorType(); if (type != ptOthers) { HandleConditionPreprocessor(type); c = CurrentChar(); } } // skip [XXX][YYY] if (m_State & tsSkipSubScrip) { while (c == _T('[') ) { SkipBlock('['); SkipWhiteSpace(); if (IsEOF()) return false; c = CurrentChar(); } } // skip the following = or ? if (m_State & tsSkipEqual) { if (c == _T('=')) { if (!SkipToOneOfChars(_T(",;}"), true, true, false)) return false; } } else if (m_State & tsSkipQuestion) { if (c == _T('?')) { if (!SkipToOneOfChars(_T(";}"), false, true)) return false; } } // skip the following white space and comments while (SkipWhiteSpace() || SkipComment()) ; if (startIndex != m_TokenIndex && CurrentChar() == _T('#')) return SkipUnwanted(); return NotEOF(); }
void TextCutter::SkipToChar(const wxChar& ch) { // skip everything until we find ch while (1) { while (!IsEOF() && CurrentChar() != ch && CurrentChar() != '\n') MoveToNextChar(); break; } }
// Behavior consistent with SkipComment bool Tokenizer::SkipWhiteSpace() { if (CurrentChar() > _T(' ') || IsEOF()) return false; // skip spaces, tabs, etc. // don't check EOF when MoveToNextChar already does, also replace isspace() which calls msvcrt.dll // with a dirty hack: CurrentChar() <= ' ' is "good enough" here while (CurrentChar() <= _T(' ') && MoveToNextChar()) ; return true; }
void TextCutter::SkipUnwanted() { if (IsEOF()) return; while (CurrentChar() == '#' || CurrentChar() == '!' || ((CurrentChar() == 'c' || CurrentChar() == 'C' || CurrentChar() == '*') && m_CurColumn == 1 && m_CurSourceForm == fsfFixed)) { SkipToEOL(); SkipWhiteSpace(); if (IsEOF()) return; } }
static int simple_fence(int sdir, int ch, int ofence) { int count = 1; /* Assmue that we're sitting at one end of the fence */ int c; /* scan for fence */ while (InDirection(sdir) && !interrupted()) { c = CurrentChar(); if (c == ch) { ++count; } else if (c == ofence) { if (--count <= 0) break; } } /* if count is zero, we have a match, move the sucker */ if (count <= 0) { if (!doingopcmd || doingsweep) sweephack = TRUE; else if (sdir == FORWARD) forwchar(TRUE,1); curwp->w_flag |= WFMOVE; return TRUE; } return FALSE; }
static int comment_fence(int sdir) { MARK comstartpos; int found = FALSE; int s = FALSE; int first = TRUE; comstartpos.l = null_ptr; while (!found) { if (!first && CurrentChar() == '/') { /* is it a comment-end? */ if (PrevCharIs('*')) { if (sdir == FORWARD) { found = TRUE; break; } else if (comstartpos.l != null_ptr) { DOT = comstartpos; found = TRUE; break; } else { return FALSE; } } /* is it a comment start? */ if (sdir == REVERSE && NextCharIs('*')) { /* remember where we are */ comstartpos = DOT; } } s = InDirection(sdir); if (s == FALSE) { if (comstartpos.l != null_ptr) { DOT = comstartpos; found = TRUE; break; } return FALSE; } if (interrupted()) return FALSE; first = FALSE; } /* if found, move the sucker */ if (found && !first) { if (!doingopcmd || doingsweep) sweephack = TRUE; else if (sdir == FORWARD) forwchar(TRUE,1); curwp->w_flag |= WFMOVE; return TRUE; } return FALSE; }
// expect we are not in a C-string bool Tokenizer::SkipToChar(const wxChar& ch) { // skip everything until we find ch while (CurrentChar() != ch && MoveToNextChar()) // don't check EOF when MoveToNextChar already does ; return NotEOF(); }
bool TextCutter::SkipWhiteSpace() { if (IsEOF()) return false; while (!IsEOF() && isspace(CurrentChar())) MoveToNextChar(); return true; }
// expect we are not in a C-string. bool Tokenizer::SkipToOneOfChars(const wxChar* chars, bool supportNesting, bool skipPreprocessor, bool skipAngleBrace) { while (NotEOF() && !CharInString(CurrentChar(), chars)) { MoveToNextChar(); while (SkipString() || SkipComment()) ; // use 'while' here to cater for consecutive blocks to skip (e.g. sometemplate<foo>(bar) // must skip <foo> and immediately after (bar)) // because if we don't, the next block won't be skipped ((bar) in the example) leading to weird // parsing results bool done = false; while (supportNesting && !done) { switch (CurrentChar()) { case '#': if (skipPreprocessor) SkipToEOL(true); else done = true; break; case '{': SkipBlock('{'); break; case '(': SkipBlock('('); break; case '[': SkipBlock('['); break; case '<': // don't skip if << operator if (skipAngleBrace) { if (NextChar() == '<') MoveToNextChar(2); // skip it and also the next '<' or the next '<' leads to a SkipBlock('<'); else SkipBlock('<'); break; } default: done = true; break; } } } return NotEOF(); }
void TextCutter::GetChunk(wxString& chunk, bool& isWord) { isWord = false; chunk = wxEmptyString; if (IsEOF()) return; unsigned int start = m_CurIdx; if (isalpha(CurrentChar()) || CurrentChar() == '_') { while (!IsEOF() && (isalnum(CurrentChar()) || CurrentChar() == '_')) MoveToNextChar(); chunk = m_Text.Mid(start, m_CurIdx - start); isWord = true; } else { SkipWhiteSpace(); SkipUnwanted(); if (start != m_CurIdx) { chunk = m_Text.Mid(start, m_CurIdx - start); return; } if (isdigit(CurrentChar())) { // numbers while (!IsEOF() && CharInString(CurrentChar(), "0123456789.abcdefABCDEFXxLl")) MoveToNextChar(); } else if (CurrentChar() == '"' || CurrentChar() == '\'') { // string, char, etc. wxChar match = CurrentChar(); MoveToNextChar(); // skip starting ' or " SkipToChar(match); MoveToNextChar(); // skip ending ' or " } else { MoveToNextChar(); } chunk = m_Text.Mid(start, m_CurIdx - start); } return; }
bool Tokenizer::SkipToInlineCommentEnd() { TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"), wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(), PreviousChar(), NextChar()); // skip everything until we find EOL while (true) { SkipToChar(_T('\n')); if (!IsBackslashBeforeEOL() || IsEOF()) break; else MoveToNextChar(); } TRACE(_T("SkipToInlineCommentEnd(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c',") _T(" NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar()); return NotEOF(); }
/* Close fences are matched against their partners, and if on screen the cursor briefly lights there */ void fmatch(int rch) { MARK oldpos; /* original position */ register LINE *toplp; /* top line in current window */ register int count; /* current fence level count */ register char c; /* current character in scan */ int dir, lch; int backcharfailed = FALSE; /* get the matching left-fence char, if it exists */ lch = is_user_fence(rch, &dir); if (lch == 0 || dir != REVERSE) return; /* first get the display update out there */ (void)update(FALSE); /* save the original cursor position */ oldpos = DOT; /* find the top line and set up for scan */ toplp = lback(curwp->w_line.l); count = 1; backchar(TRUE, 2); /* scan back until we find it, or reach past the top of the window */ while (count > 0 && DOT.l != toplp) { c = CurrentChar(); if (c == rch) ++count; if (c == lch) --count; if (backchar(FALSE, 1) != TRUE) { backcharfailed = TRUE; break; } } /* if count is zero, we have a match, display the sucker */ if (count == 0) { if (!backcharfailed) forwchar(FALSE, 1); if (update(FALSE) == TRUE) /* the idea is to leave the cursor there for about a quarter of a second */ catnap(300, FALSE); } /* restore the current position */ DOT = oldpos; }
bool Tokenizer::SkipComment(bool skipWhiteAtEnd) // = true { // C/C++ style comments bool is_comment = CurrentChar() == '/' && (NextChar() == '/' || NextChar() == '*'); if (!is_comment) return true; bool cstyle = NextChar() == '*'; MoveToNextChar(2); while (1) { if (!cstyle) { if (!SkipToEOL(false, true)) return false; MoveToNextChar(); break; } else { if (SkipToChar('/')) { if (PreviousChar() == '*') { MoveToNextChar(); break; } MoveToNextChar(); } else return false; } } if (IsEOF()) return false; if (skipWhiteAtEnd && !SkipWhiteSpace()) return false; return CurrentChar() == '/' ? SkipComment() : true; // handle chained comments }
void Tokenizer::SkipToNextConditionPreprocessor() { do { wxChar ch = CurrentChar(); if (ch == _T('\'') || ch == _T('"') || ch == _T('/') || ch <= _T(' ')) { while (SkipWhiteSpace() || SkipString() || SkipComment()) ; ch = CurrentChar(); } if (ch == _T('#')) { const unsigned int undoIndex = m_TokenIndex; const unsigned int undoLine = m_LineNumber; MoveToNextChar(); while (SkipWhiteSpace() || SkipComment()) ; const wxChar current = CurrentChar(); const wxChar next = NextChar(); // #if if (current == _T('i') && next == _T('f')) SkipToEndConditionPreprocessor(); // #else #elif #elifdef #elifndef #endif else if (current == _T('e') && (next == _T('l') || next == _T('n'))) { m_TokenIndex = undoIndex; m_LineNumber = undoLine; break; } } } while (MoveToNextChar()); }
bool Tokenizer::SkipToOneOfChars(const wxChar* chars, bool supportNesting) { // skip everything until we find any one of chars while (1) { while (NotEOF() && !CharInString(CurrentChar(), chars)) { if (CurrentChar() == '"' || CurrentChar() == '\'') { // this is the case that match is inside a string! wxChar ch = CurrentChar(); MoveToNextChar(); SkipToChar(ch); } MoveToNextChar(); // make sure we skip comments if (CurrentChar() == '/') SkipComment(); // this will decide if it is a comment // use 'while' here to cater for consecutive blocks to skip (e.g. sometemplate<foo>(bar) // must skip <foo> and immediately after (bar)) // because if we don't, the next block won't be skipped ((bar) in the example) leading to weird // parsing results bool done = false; while (supportNesting && !done) { switch (CurrentChar()) { case '{': SkipBlock('{'); break; case '(': SkipBlock('('); break; case '[': SkipBlock('['); break; case '<': // don't skip if << operator if (NextChar() == '<') MoveToNextChar(2); // skip it and also the next '<' or the next '<' leads to a SkipBlock('<'); else SkipBlock('<'); break; default: done = true; break; } } } if (PreviousChar() != '\\') break; else { // check for "\\" if (m_TokenIndex - 2 >= 0 && m_Buffer.GetChar(m_TokenIndex - 2) == '\\') break; } MoveToNextChar(); } if (IsEOF()) return false; return true; }
bool Tokenizer::SkipBlock(const wxChar& ch) { // skip blocks () [] {} <> wxChar match; switch (ch) { case '(': match = ')'; break; case '[': match = ']'; break; case '{': match = '}'; break; case '<': match = '>'; break; default : return false; } MoveToNextChar(); int count = 1; // counter for nested blocks (xxx()) while (NotEOF()) { bool noMove = false; if (CurrentChar() == '/') SkipComment(); // this will decide if it is a comment if (CurrentChar() == '"' || CurrentChar() == '\'') { // this is the case that match is inside a string! wxChar ch = CurrentChar(); MoveToNextChar(); SkipToChar(ch); MoveToNextChar(); // don't move to next char below if concatenating strings (e.g. printf("" "")) if (CurrentChar() == '"' || CurrentChar() == '\'') noMove = true; } if (CurrentChar() == ch) ++count; else if (CurrentChar() == match) --count; if (!noMove) MoveToNextChar(); if (count == 0) break; } if (IsEOF()) return false; return true; }
void Tokenizer::ReadParentheses(wxString& str, bool trimFirst) { str.Clear(); // e.g. #define AAA /*args*/ (x) x // we want read "(x)", so, we need trim the unwanted before the "(x)" if (trimFirst) { while (SkipWhiteSpace() && SkipComment()) ; if (CurrentChar() != _T('(')) return; } ReadParentheses(str); }
// For example: X"ABCDEFG\"HIJKLMN"Y // We are now at A, and would skip to Y // The double quote before H is a "C-escaped-character", We shouldn't quite from that bool Tokenizer::SkipToStringEnd(const wxChar& ch) { while (true) { while (CurrentChar() != ch && MoveToNextChar()) // don't check EOF when MoveToNextChar already does ; if (IsEOF()) return false; if (IsEscapedChar()) break; MoveToNextChar(); } return true; }
// if we really move forward, return true, which means we have the new m_TokenIndex // if we stay here, return false bool Tokenizer::SkipComment() { if (IsEOF()) return false; bool cstyle; // C or C++ style comments //check the comment prompt if (CurrentChar() == '/') { if (NextChar() == '*') cstyle = true; else if (NextChar() == '/') cstyle = false; else return false; // Not a comment, return false; } else return false; // Not a comment, return false; TRACE(_T("SkipComment() : Start from line = %d"), m_LineNumber); MoveToNextChar(2); // Skip the comment prompt // Here, we are in the comment body while (true) { if (cstyle) // C style comment { SkipToChar('/'); if (PreviousChar() == '*') // end of a C style comment { MoveToNextChar(); break; } if (!MoveToNextChar()) break; } else // C++ style comment { TRACE(_T("SkipComment() : Need to call SkipToEOL() here at line = %d"), m_LineNumber); SkipToInlineCommentEnd(); break; } } return true; }
// return true if we really skip a string, that means m_TokenIndex has changed. bool Tokenizer::SkipString() { if (IsEOF()) return false; const wxChar ch = CurrentChar(); if (ch == _T('"') || ch == _T('\'')) { // this is the case that match is inside a string! MoveToNextChar(); SkipToStringEnd(ch); MoveToNextChar(); return true; } return false; }
void Tokenizer::SpliteArguments(wxArrayString& results) { while (SkipWhiteSpace() || SkipComment()) ; if (CurrentChar() != _T('(')) return; MoveToNextChar(); // Skip the '(' int level = 1; // include '(' wxString piece; while (NotEOF()) { wxString token = DoGetToken(); if (token.IsEmpty()) break; if (token == _T("(")) ++level; else if (token == _T(")")) --level; if (token == _T(",")) { results.Add(piece); piece.Clear(); } else if (level != 0) { if (!piece.IsEmpty() && piece.Last() > _T(' ')) piece << _T(" "); piece << token; } if (level == 0) { if (!piece.IsEmpty()) results.Add(piece); break; } while (SkipWhiteSpace() || SkipComment()) ; } }
bool Tokenizer::SkipToChar(const wxChar& ch) { // skip everything until we find ch while(true) { while (CurrentChar() != ch && MoveToNextChar()) // don't check EOF when MoveToNextChar already does ; if (IsEOF()) return false; if (PreviousChar() != '\\') break; else { // check for "\\" if (m_TokenIndex - 2 >= 0 && m_Buffer.GetChar(m_TokenIndex - 2) == '\\') break; } MoveToNextChar(); } return true; }
bool Tokenizer::SkipToEOL(bool nestBraces) { TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c', nestBrace(%d)"), wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(), PreviousChar(), NextChar(), nestBraces ? 1 : 0); // skip everything until we find EOL for (;;) { while (NotEOF() && CurrentChar() != '\n') { if (CurrentChar() == '/' && NextChar() == '*') { SkipComment(); if (CurrentChar() == _T('\n')) break; } if (nestBraces && CurrentChar() == _T('{')) ++m_NestLevel; else if (nestBraces && CurrentChar() == _T('}')) --m_NestLevel; MoveToNextChar(); } if (!IsBackslashBeforeEOL() || IsEOF()) break; else MoveToNextChar(); } TRACE(_T("SkipToEOL(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar()); return NotEOF(); }
void TextCutter::AdjustColumn() { if (CurrentChar() == '\n') m_CurColumn = 0; }
wxString Tokenizer::ReadToEOL(bool nestBraces, bool stripUnneeded) { if (stripUnneeded) { TRACE(_T("%s : line=%d, CurrentChar='%c', PreviousChar='%c', NextChar='%c', nestBrace(%d)"), wxString(__PRETTY_FUNCTION__, wxConvUTF8).wc_str(), m_LineNumber, CurrentChar(), PreviousChar(), NextChar(), nestBraces ? 1 : 0); static const size_t maxBufferLen = 4094; wxChar buffer[maxBufferLen + 2]; wxChar* p = buffer; wxString str; for (;;) { while (NotEOF() && CurrentChar() != _T('\n')) { while (SkipComment()) ; const wxChar ch = CurrentChar(); if (ch == _T('\n')) break; if (ch <= _T(' ') && (p == buffer || *(p - 1) == ch)) { MoveToNextChar(); continue; } *p = ch; ++p; if (p >= buffer + maxBufferLen) { str.Append(buffer, p - buffer); p = buffer; } if (nestBraces) { if (ch == _T('{')) ++m_NestLevel; else if (ch == _T('}')) --m_NestLevel; } MoveToNextChar(); } if (!IsBackslashBeforeEOL() || IsEOF()) break; else { while (p > buffer && *(--p) <= _T(' ')) ; MoveToNextChar(); } } while (p > buffer && *(p - 1) <= _T(' ')) --p; if (p > buffer) str.Append(buffer, p - buffer); TRACE(_T("ReadToEOL(): (END) We are now at line %d, CurrentChar='%c', PreviousChar='%c', NextChar='%c'"), m_LineNumber, CurrentChar(), PreviousChar(), NextChar()); TRACE(_T("ReadToEOL(): %s"), str.wx_str()); return str; } else { const unsigned int idx = m_TokenIndex; SkipToEOL(nestBraces); return m_Buffer.Mid(idx, m_TokenIndex - idx); } }
void Tokenizer::ReadParentheses(wxString& str) { static const size_t maxBufferLen = 4093; wxChar buffer[maxBufferLen + 3]; buffer[0] = _T('$'); // avoid segfault error wxChar* realBuffer = buffer + 1; wxChar* p = realBuffer; int level = 0; while (NotEOF()) { while (SkipComment()) ; wxChar ch = CurrentChar(); while (ch == _T('#')) // do not use if { const PreprocessorType type = GetPreprocessorType(); if (type == ptOthers) break; HandleConditionPreprocessor(type); ch = CurrentChar(); } const unsigned int startIndex = m_TokenIndex; switch(ch) { case _T('('): { ++level; *p = ch; ++p; } break; case _T(')'): { if (*(p - 1) <= _T(' ')) --p; --level; *p = ch; ++p; } break; case _T('\''): case _T('"'): { MoveToNextChar(); SkipToStringEnd(ch); MoveToNextChar(); const size_t writeLen = m_TokenIndex - startIndex; const size_t usedLen = p - realBuffer; if (usedLen + writeLen > maxBufferLen) { if (writeLen > maxBufferLen) { TRACE(_T("ReadParentheses, Catch Exception 1: %d"), writeLen); return; } if (p != realBuffer) { str.Append(realBuffer, usedLen); p = realBuffer; } str.Append((const wxChar*)m_Buffer + startIndex, writeLen); } else { memcpy(p, (const wxChar*)m_Buffer + startIndex, writeLen * sizeof(wxChar)); p += writeLen; } continue; } break; case _T(','): { if (*(p - 1) <= _T(' ')) --p; *p = _T(','); *++p = _T(' '); ++p; } break; case _T('*'): { if (*(p - 1) <= _T(' ')) --p; *p = _T('*'); *++p = _T(' '); ++p; } break; case _T('&'): { if (*(p - 1) <= _T(' ')) --p; *p = _T('&'); *++p = _T(' '); ++p; } break; case _T('='): { if (*(p - 1) <= _T(' ')) { *p = _T('='); *++p = _T(' '); ++p; } else { switch (*(p - 1)) { case _T('='): case _T('!'): case _T('>'): case _T('<'): { *p = _T('='); *++p = _T(' '); ++p; } default: { *p = _T(' '); *++p = _T('='); *++p = _T(' '); ++p; } } } } break; case _T(' '): { if (*(p - 1) != _T(' ') && *(p - 1) != _T('(')) { *p = _T(' '); ++p; } } break; case _T('\r'): case _T('\t'): break; case _T('\n'): // we need keep the \n for records paras correct position if (*(p - 1) == _T(' ')) --p; if (*(p - 1) != _T('(')) { *p = ch; ++p; } break; default: { *p = ch; ++p; } break; } if (p >= realBuffer + maxBufferLen) { str.Append(realBuffer, p - realBuffer); p = realBuffer; } MoveToNextChar(); if (level == 0) break; } if (p > realBuffer) str.Append(realBuffer, p - realBuffer); TRACE(_T("ReadParentheses(): %s, line=%d"), str.wx_str(), m_LineNumber); if (str.Len() > 512) TRACE(_T("ReadParentheses: Catch Exception 2?: %d"), str.Len()); }
void TextCutter::SkipToEOL() { while (!IsEOF() && CurrentChar() != '\n') MoveToNextChar(); }