static void createTokens (int startPosition, const String& lineText, CodeDocument::Iterator& source, CodeTokeniser* analyser, OwnedArray <SyntaxToken>& newTokens) { CodeDocument::Iterator lastIterator (source); const int lineLength = lineText.length(); for (;;) { int tokenType = analyser->readNextToken (source); int tokenStart = lastIterator.getPosition(); int tokenEnd = source.getPosition(); if (tokenEnd <= tokenStart) break; tokenEnd -= startPosition; if (tokenEnd > 0) { tokenStart -= startPosition; newTokens.add (new SyntaxToken (lineText.substring (jmax (0, tokenStart), tokenEnd), tokenType)); if (tokenEnd >= lineLength) break; } lastIterator = source; } source = lastIterator; }
void CodeEditorComponent::getIteratorForPosition (int position, CodeDocument::Iterator& source) { if (codeTokeniser == 0) return; for (int i = cachedIterators.size(); --i >= 0;) { CodeDocument::Iterator* t = cachedIterators.getUnchecked (i); if (t->getPosition() <= position) { source = *t; break; } } while (source.getPosition() < position) { const CodeDocument::Iterator original (source); codeTokeniser->readNextToken (source); if (source.getPosition() > position || source.isEOF()) { source = original; break; } } }
int parseIdentifier (CodeDocument::Iterator& source) noexcept { int tokenLength = 0; String::CharPointerType::CharType possibleIdentifier [100]; String::CharPointerType possible (possibleIdentifier); while (isIdentifierBody (source.peekNextChar())) { const juce_wchar c = source.nextChar(); if (tokenLength < 20) possible.write (c); ++tokenLength; } if (tokenLength > 1 && tokenLength <= 16) { possible.writeNull(); if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength)) return CPlusPlusCodeTokeniser::tokenType_builtInKeyword; } return CPlusPlusCodeTokeniser::tokenType_identifier; }
void CodeEditorComponent::updateCachedIterators (int maxLineNum) { const int maxNumCachedPositions = 5000; const int linesBetweenCachedSources = jmax (10, document.getNumLines() / maxNumCachedPositions); if (cachedIterators.size() == 0) cachedIterators.add (new CodeDocument::Iterator (document)); if (codeTokeniser != nullptr) { for (;;) { CodeDocument::Iterator& last = *cachedIterators.getLast(); if (last.getLine() >= maxLineNum) break; CodeDocument::Iterator* t = new CodeDocument::Iterator (last); cachedIterators.add (t); const int targetLine = last.getLine() + linesBetweenCachedSources; for (;;) { codeTokeniser->readNextToken (*t); if (t->getLine() >= targetLine) break; if (t->isEOF()) return; } } } }
int CtrlrMIDIBufferTokeniser::readNextToken (CodeDocument::Iterator &source) { const juce_wchar c = source.peekNextChar(); int result = CtrlrMIDIBufferTokeniser::tokenType_rawData; int pos = source.getPosition(); int prefixHigh = (tokenSet.prefixLen*3)-1; int suffixLow = ((tokenSet.len) - (tokenSet.suffixLen*3)); int suffixHigh = tokenSet.len; int dataLow = (tokenSet.dataOffset*3)-1; int dataHigh = dataLow + ((tokenSet.dataLen*3)); int nameLow = (tokenSet.nameOffset*3)-1; int nameHigh = nameLow + (tokenSet.nameLen*3); source.skipWhitespace(); switch (c) { case 0: source.skip(); return (result); } if (owner.getUnequalPosition().contains(pos)) { source.skip(); return (tokenType_mismatch); } if (pos >= 0 && pos < prefixHigh && prefixHigh>0) { source.skip(); return (tokenType_prefix); } if (pos >= suffixLow && pos <= suffixHigh) { source.skip(); return (tokenType_suffix); } if (pos >= dataLow && pos < dataHigh) { source.skip(); return (tokenType_data); } if (pos >= nameLow && pos < nameHigh) { source.skip(); return (tokenType_name); } source.skip(); return result; }
bool skipNumberSuffix (CodeDocument::Iterator& source) { const juce_wchar c = source.peekNextChar(); if (c == 'l' || c == 'L' || c == 'u' || c == 'U') source.skip(); if (CharacterFunctions::isLetterOrDigit (source.peekNextChar())) return false; return true; }
bool parseOctalLiteral (CodeDocument::Iterator& source) noexcept { if (source.nextChar() != '0') return false; if (! isOctalDigit (source.nextChar())) return false; while (isOctalDigit (source.peekNextChar())) source.skip(); return skipNumberSuffix (source); }
bool parseDecimalLiteral (CodeDocument::Iterator& source) noexcept { int numChars = 0; while (isDecimalDigit (source.peekNextChar())) { ++numChars; source.skip(); } if (numChars == 0) return false; return skipNumberSuffix (source); }
void skipQuotedString (CodeDocument::Iterator& source) noexcept { const juce_wchar quote = source.nextChar(); for (;;) { const juce_wchar c = source.nextChar(); if (c == quote || c == 0) break; if (c == '\\') source.skip(); } }
int parseNumber (CodeDocument::Iterator& source) { const CodeDocument::Iterator original (source); if (parseFloatLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_floatLiteral; source = original; if (parseHexLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integerLiteral; source = original; if (parseOctalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integerLiteral; source = original; if (parseDecimalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integerLiteral; source = original; source.skip(); return CPlusPlusCodeTokeniser::tokenType_error; }
bool parseHexLiteral (CodeDocument::Iterator& source) noexcept { if (source.nextChar() != '0') return false; juce_wchar c = source.nextChar(); if (c != 'x' && c != 'X') return false; int numDigits = 0; while (isHexDigit (source.peekNextChar())) { ++numDigits; source.skip(); } if (numDigits == 0) return false; return skipNumberSuffix (source); }
void skipComment (CodeDocument::Iterator& source) noexcept { bool lastWasStar = false; for (;;) { const juce_wchar c = source.nextChar(); if (c == 0 || (c == '/' && lastWasStar)) break; lastWasStar = (c == '*'); } }
static void skipComment (CodeDocument::Iterator& source) throw() { bool lastWasStar = false; for (;;) { const juce_wchar c = source.nextChar(); if (c == 0 || (c == T('/') && lastWasStar)) break; lastWasStar = (c == '*'); } }
//============================================================================== int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source) { int result = tokenType_error; source.skipWhitespace(); juce_wchar firstChar = source.peekNextChar(); switch (firstChar) { case 0: source.skip(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': result = CppTokeniser::parseNumber (source); break; case '.': result = CppTokeniser::parseNumber (source); if (result == tokenType_error) result = tokenType_punctuation; break; case ',': case ';': case ':': source.skip(); result = tokenType_punctuation; break; case '(': case ')': case '{': case '}': case '[': case ']': source.skip(); result = tokenType_bracket; break; case '"': case '\'': CppTokeniser::skipQuotedString (source); result = tokenType_stringLiteral; break; case '+': result = tokenType_operator; source.skip(); if (source.peekNextChar() == '+') source.skip(); else if (source.peekNextChar() == '=') source.skip(); break; case '-': source.skip(); result = CppTokeniser::parseNumber (source); if (result == tokenType_error) { result = tokenType_operator; if (source.peekNextChar() == '-') source.skip(); else if (source.peekNextChar() == '=') source.skip(); } break; case '*': case '%': case '=': case '!': result = tokenType_operator; source.skip(); if (source.peekNextChar() == '=') source.skip(); break; case '/': result = tokenType_operator; source.skip(); if (source.peekNextChar() == '=') { source.skip(); } else if (source.peekNextChar() == '/') { result = tokenType_comment; source.skipToEndOfLine(); } else if (source.peekNextChar() == '*') { source.skip(); result = tokenType_comment; CppTokeniser::skipComment (source); } break; case '?': case '~': source.skip(); result = tokenType_operator; break; case '<': source.skip(); result = tokenType_operator; if (source.peekNextChar() == '=') { source.skip(); } else if (source.peekNextChar() == '<') { source.skip(); if (source.peekNextChar() == '=') source.skip(); } break; case '>': source.skip(); result = tokenType_operator; if (source.peekNextChar() == '=') { source.skip(); } else if (source.peekNextChar() == '<') { source.skip(); if (source.peekNextChar() == '=') source.skip(); } break; case '|': source.skip(); result = tokenType_operator; if (source.peekNextChar() == '=') { source.skip(); } else if (source.peekNextChar() == '|') { source.skip(); if (source.peekNextChar() == '=') source.skip(); } break; case '&': source.skip(); result = tokenType_operator; if (source.peekNextChar() == '=') { source.skip(); } else if (source.peekNextChar() == '&') { source.skip(); if (source.peekNextChar() == '=') source.skip(); } break; case '^': source.skip(); result = tokenType_operator; if (source.peekNextChar() == '=') { source.skip(); } else if (source.peekNextChar() == '^') { source.skip(); if (source.peekNextChar() == '=') source.skip(); } break; case '#': result = tokenType_preprocessor; source.skipToEndOfLine(); break; default: if (CppTokeniser::isIdentifierStart (firstChar)) result = CppTokeniser::parseIdentifier (source); else source.skip(); break; } return result; }
bool parseFloatLiteral (CodeDocument::Iterator& source) noexcept { int numDigits = 0; while (isDecimalDigit (source.peekNextChar())) { source.skip(); ++numDigits; } const bool hasPoint = (source.peekNextChar() == '.'); if (hasPoint) { source.skip(); while (isDecimalDigit (source.peekNextChar())) { source.skip(); ++numDigits; } } if (numDigits == 0) return false; juce_wchar c = source.peekNextChar(); const bool hasExponent = (c == 'e' || c == 'E'); if (hasExponent) { source.skip(); c = source.peekNextChar(); if (c == '+' || c == '-') source.skip(); int numExpDigits = 0; while (isDecimalDigit (source.peekNextChar())) { source.skip(); ++numExpDigits; } if (numExpDigits == 0) return false; } c = source.peekNextChar(); if (c == 'f' || c == 'F') source.skip(); else if (! (hasExponent || hasPoint)) return false; return true; }
//============================================================================== int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source) { int result = tokenType_error; source.skipWhitespace(); tchar firstChar = source.peekNextChar(); switch (firstChar) { case 0: source.skip(); break; case T('0'): case T('1'): case T('2'): case T('3'): case T('4'): case T('5'): case T('6'): case T('7'): case T('8'): case T('9'): result = CppTokeniser::parseNumber (source); break; case T('.'): result = CppTokeniser::parseNumber (source); if (result == tokenType_error) result = tokenType_punctuation; break; case T(','): case T(';'): case T(':'): source.skip(); result = tokenType_punctuation; break; case T('('): case T(')'): case T('{'): case T('}'): case T('['): case T(']'): source.skip(); result = tokenType_bracket; break; case T('"'): case T('\''): CppTokeniser::skipQuotedString (source); result = tokenType_stringLiteral; break; case T('+'): result = tokenType_operator; source.skip(); if (source.peekNextChar() == T('+')) source.skip(); else if (source.peekNextChar() == T('=')) source.skip(); break; case T('-'): source.skip(); result = CppTokeniser::parseNumber (source); if (result == tokenType_error) { result = tokenType_operator; if (source.peekNextChar() == T('-')) source.skip(); else if (source.peekNextChar() == T('=')) source.skip(); } break; case T('*'): case T('%'): case T('='): case T('!'): result = tokenType_operator; source.skip(); if (source.peekNextChar() == T('=')) source.skip(); break; case T('/'): result = tokenType_operator; source.skip(); if (source.peekNextChar() == T('=')) { source.skip(); } else if (source.peekNextChar() == T('/')) { result = tokenType_comment; source.skipToEndOfLine(); } else if (source.peekNextChar() == T('*')) { source.skip(); result = tokenType_comment; CppTokeniser::skipComment (source); } break; case T('?'): case T('~'): source.skip(); result = tokenType_operator; break; case T('<'): source.skip(); result = tokenType_operator; if (source.peekNextChar() == T('=')) { source.skip(); } else if (source.peekNextChar() == T('<')) { source.skip(); if (source.peekNextChar() == T('=')) source.skip(); } break; case T('>'): source.skip(); result = tokenType_operator; if (source.peekNextChar() == T('=')) { source.skip(); } else if (source.peekNextChar() == T('<')) { source.skip(); if (source.peekNextChar() == T('=')) source.skip(); } break; case T('|'): source.skip(); result = tokenType_operator; if (source.peekNextChar() == T('=')) { source.skip(); } else if (source.peekNextChar() == T('|')) { source.skip(); if (source.peekNextChar() == T('=')) source.skip(); } break; case T('&'): source.skip(); result = tokenType_operator; if (source.peekNextChar() == T('=')) { source.skip(); } else if (source.peekNextChar() == T('&')) { source.skip(); if (source.peekNextChar() == T('=')) source.skip(); } break; case T('^'): source.skip(); result = tokenType_operator; if (source.peekNextChar() == T('=')) { source.skip(); } else if (source.peekNextChar() == T('^')) { source.skip(); if (source.peekNextChar() == T('=')) source.skip(); } break; case T('#'): result = tokenType_preprocessor; source.skipToEndOfLine(); break; default: if (CppTokeniser::isIdentifierStart (firstChar)) result = CppTokeniser::parseIdentifier (source); else source.skip(); break; } //jassert (result != tokenType_unknown); return result; }
static int parseIdentifier (CodeDocument::Iterator& source) throw() { static const tchar* keywords2Char[] = { T("if"), T("do"), T("or"), 0 }; static const tchar* keywords3Char[] = { T("for"), T("int"), T("new"), T("try"), T("xor"), T("and"), T("asm"), T("not"), 0 }; static const tchar* keywords4Char[] = { T("bool"), T("void"), T("this"), T("true"), T("long"), T("else"), T("char"), T("enum"), T("case"), T("goto"), T("auto"), 0 }; static const tchar* keywords5Char[] = { T("while"), T("bitor"), T("break"), T("catch"), T("class"), T("compl"), T("const"), T("false"), T("float"), T("short"), T("throw"), T("union"), T("using"), T("or_eq"), 0 }; static const tchar* keywords6Char[] = { T("return"), T("struct"), T("and_eq"), T("bitand"), T("delete"), T("double"), T("extern"), T("friend"), T("inline"), T("not_eq"), T("public"), T("sizeof"), T("static"), T("signed"), T("switch"), T("typeid"), T("wchar_t"), T("xor_eq"), 0}; static const tchar* keywordsOther[] = { T("const_cast"), T("continue"), T("default"), T("explicit"), T("mutable"), T("namespace"), T("operator"), T("private"), T("protected"), T("register"), T("reinterpret_cast"), T("static_cast"), T("template"), T("typedef"), T("typename"), T("unsigned"), T("virtual"), T("volatile"), T("@implementation"), T("@interface"), T("@end"), T("@synthesize"), T("@dynamic"), T("@public"), T("@private"), T("@property"), T("@protected"), T("@class"), 0 }; int tokenLength = 0; tchar possibleIdentifier [19]; while (isIdentifierBody (source.peekNextChar())) { const tchar c = source.nextChar(); if (tokenLength < numElementsInArray (possibleIdentifier) - 1) possibleIdentifier [tokenLength] = c; ++tokenLength; } if (tokenLength > 1 && tokenLength <= 16) { possibleIdentifier [tokenLength] = 0; const tchar** k; switch (tokenLength) { case 2: k = keywords2Char; break; case 3: k = keywords3Char; break; case 4: k = keywords4Char; break; case 5: k = keywords5Char; break; case 6: k = keywords6Char; break; default: k = keywordsOther; break; } int i = 0; while (k[i] != 0) { if (k[i][0] == possibleIdentifier[0] && CharacterFunctions::compare (k[i], possibleIdentifier) == 0) return CPlusPlusCodeTokeniser::tokenType_builtInKeyword; ++i; } } return CPlusPlusCodeTokeniser::tokenType_identifier; }