static void createTokens (int startPosition, const String& lineText,
                              CodeDocument::Iterator& source,
                              CodeTokeniser* analyser,
                              OwnedArray <SyntaxToken>& newTokens)
    {
        CodeDocument::Iterator lastIterator (source);
        const int lineLength = lineText.length();

        for (;;)
        {
            int tokenType = analyser->readNextToken (source);
            int tokenStart = lastIterator.getPosition();
            int tokenEnd = source.getPosition();

            if (tokenEnd <= tokenStart)
                break;

            tokenEnd -= startPosition;

            if (tokenEnd > 0)
            {
                tokenStart -= startPosition;
                newTokens.add (new SyntaxToken (lineText.substring (jmax (0, tokenStart), tokenEnd),
                                                tokenType));

                if (tokenEnd >= lineLength)
                    break;
            }

            lastIterator = source;
        }

        source = lastIterator;
    }
void CodeEditorComponent::getIteratorForPosition (int position, CodeDocument::Iterator& source)
{
    if (codeTokeniser == 0)
        return;

    for (int i = cachedIterators.size(); --i >= 0;)
    {
        CodeDocument::Iterator* t = cachedIterators.getUnchecked (i);
        if (t->getPosition() <= position)
        {
            source = *t;
            break;
        }
    }

    while (source.getPosition() < position)
    {
        const CodeDocument::Iterator original (source);
        codeTokeniser->readNextToken (source);

        if (source.getPosition() > position || source.isEOF())
        {
            source = original;
            break;
        }
    }
}
    int parseIdentifier (CodeDocument::Iterator& source) noexcept
    {
        int tokenLength = 0;
        String::CharPointerType::CharType possibleIdentifier [100];
        String::CharPointerType possible (possibleIdentifier);

        while (isIdentifierBody (source.peekNextChar()))
        {
            const juce_wchar c = source.nextChar();

            if (tokenLength < 20)
                possible.write (c);

            ++tokenLength;
        }

        if (tokenLength > 1 && tokenLength <= 16)
        {
            possible.writeNull();

            if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
                return CPlusPlusCodeTokeniser::tokenType_builtInKeyword;
        }

        return CPlusPlusCodeTokeniser::tokenType_identifier;
    }
void CodeEditorComponent::updateCachedIterators (int maxLineNum)
{
    const int maxNumCachedPositions = 5000;
    const int linesBetweenCachedSources = jmax (10, document.getNumLines() / maxNumCachedPositions);

    if (cachedIterators.size() == 0)
        cachedIterators.add (new CodeDocument::Iterator (document));

    if (codeTokeniser != nullptr)
    {
        for (;;)
        {
            CodeDocument::Iterator& last = *cachedIterators.getLast();

            if (last.getLine() >= maxLineNum)
                break;

            CodeDocument::Iterator* t = new CodeDocument::Iterator (last);
            cachedIterators.add (t);
            const int targetLine = last.getLine() + linesBetweenCachedSources;

            for (;;)
            {
                codeTokeniser->readNextToken (*t);

                if (t->getLine() >= targetLine)
                    break;

                if (t->isEOF())
                    return;
            }
        }
    }
}
Esempio n. 5
0
int CtrlrMIDIBufferTokeniser::readNextToken (CodeDocument::Iterator &source)
{
	const juce_wchar c 	= source.peekNextChar();
	int result  		= CtrlrMIDIBufferTokeniser::tokenType_rawData;
	int pos 			= source.getPosition();
	int prefixHigh		= (tokenSet.prefixLen*3)-1;
	int suffixLow		= ((tokenSet.len) - (tokenSet.suffixLen*3));
	int suffixHigh		= tokenSet.len;
	int dataLow			= (tokenSet.dataOffset*3)-1;
	int dataHigh		= dataLow + ((tokenSet.dataLen*3));
	int nameLow			= (tokenSet.nameOffset*3)-1;
	int nameHigh		= nameLow + (tokenSet.nameLen*3);

	source.skipWhitespace();

	switch (c)
	{
		case 0:
			source.skip();
            return (result);
	}

	if (owner.getUnequalPosition().contains(pos))
	{
		source.skip();
		return (tokenType_mismatch);
	}

	if (pos >= 0 && pos < prefixHigh && prefixHigh>0)
	{
		source.skip();
		return (tokenType_prefix);
	}

	if (pos >= suffixLow && pos <= suffixHigh)
	{
		source.skip();
		return (tokenType_suffix);
	}

	if (pos >= dataLow && pos < dataHigh)
	{
		source.skip();
		return (tokenType_data);
	}

	if (pos >= nameLow && pos < nameHigh)
	{
		source.skip();
		return (tokenType_name);
	}
	source.skip();
	return result;
}
    bool skipNumberSuffix (CodeDocument::Iterator& source)
    {
        const juce_wchar c = source.peekNextChar();
        if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
            source.skip();

        if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
            return false;

        return true;
    }
    bool parseOctalLiteral (CodeDocument::Iterator& source) noexcept
    {
        if (source.nextChar() != '0')
            return false;

        if (! isOctalDigit (source.nextChar()))
             return false;

        while (isOctalDigit (source.peekNextChar()))
            source.skip();

        return skipNumberSuffix (source);
    }
    bool parseDecimalLiteral (CodeDocument::Iterator& source) noexcept
    {
        int numChars = 0;
        while (isDecimalDigit (source.peekNextChar()))
        {
            ++numChars;
            source.skip();
        }

        if (numChars == 0)
            return false;

        return skipNumberSuffix (source);
    }
    void skipQuotedString (CodeDocument::Iterator& source) noexcept
    {
        const juce_wchar quote = source.nextChar();

        for (;;)
        {
            const juce_wchar c = source.nextChar();

            if (c == quote || c == 0)
                break;

            if (c == '\\')
                source.skip();
        }
    }
    int parseNumber (CodeDocument::Iterator& source)
    {
        const CodeDocument::Iterator original (source);

        if (parseFloatLiteral (source))
            return CPlusPlusCodeTokeniser::tokenType_floatLiteral;

        source = original;

        if (parseHexLiteral (source))
            return CPlusPlusCodeTokeniser::tokenType_integerLiteral;

        source = original;

        if (parseOctalLiteral (source))
            return CPlusPlusCodeTokeniser::tokenType_integerLiteral;

        source = original;

        if (parseDecimalLiteral (source))
            return CPlusPlusCodeTokeniser::tokenType_integerLiteral;

        source = original;
        source.skip();

        return CPlusPlusCodeTokeniser::tokenType_error;
    }
    bool parseHexLiteral (CodeDocument::Iterator& source) noexcept
    {
        if (source.nextChar() != '0')
            return false;

        juce_wchar c = source.nextChar();
        if (c != 'x' && c != 'X')
            return false;

        int numDigits = 0;
        while (isHexDigit (source.peekNextChar()))
        {
            ++numDigits;
            source.skip();
        }

        if (numDigits == 0)
            return false;

        return skipNumberSuffix (source);
    }
    void skipComment (CodeDocument::Iterator& source) noexcept
    {
        bool lastWasStar = false;

        for (;;)
        {
            const juce_wchar c = source.nextChar();

            if (c == 0 || (c == '/' && lastWasStar))
                break;

            lastWasStar = (c == '*');
        }
    }
static void skipComment (CodeDocument::Iterator& source) throw()
{
    bool lastWasStar = false;

    for (;;)
    {
        const juce_wchar c = source.nextChar();

        if (c == 0 || (c == T('/') && lastWasStar))
            break;

        lastWasStar = (c == '*');
    }
}
//==============================================================================
int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
{
    int result = tokenType_error;
    source.skipWhitespace();

    juce_wchar firstChar = source.peekNextChar();

    switch (firstChar)
    {
    case 0:
        source.skip();
        break;

    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
        result = CppTokeniser::parseNumber (source);
        break;

    case '.':
        result = CppTokeniser::parseNumber (source);

        if (result == tokenType_error)
            result = tokenType_punctuation;

        break;

    case ',':
    case ';':
    case ':':
        source.skip();
        result = tokenType_punctuation;
        break;

    case '(':
    case ')':
    case '{':
    case '}':
    case '[':
    case ']':
        source.skip();
        result = tokenType_bracket;
        break;

    case '"':
    case '\'':
        CppTokeniser::skipQuotedString (source);
        result = tokenType_stringLiteral;
        break;

    case '+':
        result = tokenType_operator;
        source.skip();

        if (source.peekNextChar() == '+')
            source.skip();
        else if (source.peekNextChar() == '=')
            source.skip();

        break;

    case '-':
        source.skip();
        result = CppTokeniser::parseNumber (source);

        if (result == tokenType_error)
        {
            result = tokenType_operator;

            if (source.peekNextChar() == '-')
                source.skip();
            else if (source.peekNextChar() == '=')
                source.skip();
        }
        break;

    case '*':
    case '%':
    case '=':
    case '!':
        result = tokenType_operator;
        source.skip();

        if (source.peekNextChar() == '=')
            source.skip();

        break;

    case '/':
        result = tokenType_operator;
        source.skip();

        if (source.peekNextChar() == '=')
        {
            source.skip();
        }
        else if (source.peekNextChar() == '/')
        {
            result = tokenType_comment;
            source.skipToEndOfLine();
        }
        else if (source.peekNextChar() == '*')
        {
            source.skip();
            result = tokenType_comment;
            CppTokeniser::skipComment (source);
        }

        break;

    case '?':
    case '~':
        source.skip();
        result = tokenType_operator;
        break;

    case '<':
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == '=')
        {
            source.skip();
        }
        else if (source.peekNextChar() == '<')
        {
            source.skip();

            if (source.peekNextChar() == '=')
                source.skip();
        }

        break;

    case '>':
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == '=')
        {
            source.skip();
        }
        else if (source.peekNextChar() == '<')
        {
            source.skip();

            if (source.peekNextChar() == '=')
                source.skip();
        }

        break;

    case '|':
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == '=')
        {
            source.skip();
        }
        else if (source.peekNextChar() == '|')
        {
            source.skip();

            if (source.peekNextChar() == '=')
                source.skip();
        }

        break;

    case '&':
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == '=')
        {
            source.skip();
        }
        else if (source.peekNextChar() == '&')
        {
            source.skip();

            if (source.peekNextChar() == '=')
                source.skip();
        }

        break;

    case '^':
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == '=')
        {
            source.skip();
        }
        else if (source.peekNextChar() == '^')
        {
            source.skip();

            if (source.peekNextChar() == '=')
                source.skip();
        }

        break;

    case '#':
        result = tokenType_preprocessor;
        source.skipToEndOfLine();
        break;

    default:
        if (CppTokeniser::isIdentifierStart (firstChar))
            result = CppTokeniser::parseIdentifier (source);
        else
            source.skip();

        break;
    }

    return result;
}
    bool parseFloatLiteral (CodeDocument::Iterator& source) noexcept
    {
        int numDigits = 0;

        while (isDecimalDigit (source.peekNextChar()))
        {
            source.skip();
            ++numDigits;
        }

        const bool hasPoint = (source.peekNextChar() == '.');

        if (hasPoint)
        {
            source.skip();

            while (isDecimalDigit (source.peekNextChar()))
            {
                source.skip();
                ++numDigits;
            }
        }

        if (numDigits == 0)
            return false;

        juce_wchar c = source.peekNextChar();
        const bool hasExponent = (c == 'e' || c == 'E');

        if (hasExponent)
        {
            source.skip();

            c = source.peekNextChar();
            if (c == '+' || c == '-')
                source.skip();

            int numExpDigits = 0;
            while (isDecimalDigit (source.peekNextChar()))
            {
                source.skip();
                ++numExpDigits;
            }

            if (numExpDigits == 0)
                return false;
        }

        c = source.peekNextChar();
        if (c == 'f' || c == 'F')
            source.skip();
        else if (! (hasExponent || hasPoint))
            return false;

        return true;
    }
//==============================================================================
int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
{
    int result = tokenType_error;
    source.skipWhitespace();

    tchar firstChar = source.peekNextChar();

    switch (firstChar)
    {
    case 0:
        source.skip();
        break;

    case T('0'):
    case T('1'):
    case T('2'):
    case T('3'):
    case T('4'):
    case T('5'):
    case T('6'):
    case T('7'):
    case T('8'):
    case T('9'):
        result = CppTokeniser::parseNumber (source);
        break;

    case T('.'):
        result = CppTokeniser::parseNumber (source);

        if (result == tokenType_error)
            result = tokenType_punctuation;

        break;

    case T(','):
    case T(';'):
    case T(':'):
        source.skip();
        result = tokenType_punctuation;
        break;

    case T('('):
    case T(')'):
    case T('{'):
    case T('}'):
    case T('['):
    case T(']'):
        source.skip();
        result = tokenType_bracket;
        break;

    case T('"'):
    case T('\''):
        CppTokeniser::skipQuotedString (source);
        result = tokenType_stringLiteral;
        break;

    case T('+'):
        result = tokenType_operator;
        source.skip();

        if (source.peekNextChar() == T('+'))
            source.skip();
        else if (source.peekNextChar() == T('='))
            source.skip();

        break;

    case T('-'):
        source.skip();
        result = CppTokeniser::parseNumber (source);

        if (result == tokenType_error)
        {
            result = tokenType_operator;

            if (source.peekNextChar() == T('-'))
                source.skip();
            else if (source.peekNextChar() == T('='))
                source.skip();
        }
        break;

    case T('*'):
    case T('%'):
    case T('='):
    case T('!'):
        result = tokenType_operator;
        source.skip();

        if (source.peekNextChar() == T('='))
            source.skip();

        break;

    case T('/'):
        result = tokenType_operator;
        source.skip();

        if (source.peekNextChar() == T('='))
        {
            source.skip();
        }
        else if (source.peekNextChar() == T('/'))
        {
            result = tokenType_comment;
            source.skipToEndOfLine();
        }
        else if (source.peekNextChar() == T('*'))
        {
            source.skip();
            result = tokenType_comment;
            CppTokeniser::skipComment (source);
        }

        break;

    case T('?'):
    case T('~'):
        source.skip();
        result = tokenType_operator;
        break;

    case T('<'):
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == T('='))
        {
            source.skip();
        }
        else if (source.peekNextChar() == T('<'))
        {
            source.skip();

            if (source.peekNextChar() == T('='))
                source.skip();
        }

        break;

    case T('>'):
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == T('='))
        {
            source.skip();
        }
        else if (source.peekNextChar() == T('<'))
        {
            source.skip();

            if (source.peekNextChar() == T('='))
                source.skip();
        }

        break;

    case T('|'):
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == T('='))
        {
            source.skip();
        }
        else if (source.peekNextChar() == T('|'))
        {
            source.skip();

            if (source.peekNextChar() == T('='))
                source.skip();
        }

        break;

    case T('&'):
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == T('='))
        {
            source.skip();
        }
        else if (source.peekNextChar() == T('&'))
        {
            source.skip();

            if (source.peekNextChar() == T('='))
                source.skip();
        }

        break;

    case T('^'):
        source.skip();
        result = tokenType_operator;

        if (source.peekNextChar() == T('='))
        {
            source.skip();
        }
        else if (source.peekNextChar() == T('^'))
        {
            source.skip();

            if (source.peekNextChar() == T('='))
                source.skip();
        }

        break;

    case T('#'):
        result = tokenType_preprocessor;
        source.skipToEndOfLine();
        break;

    default:
        if (CppTokeniser::isIdentifierStart (firstChar))
            result = CppTokeniser::parseIdentifier (source);
        else
            source.skip();

        break;
    }

    //jassert (result != tokenType_unknown);
    return result;
}
static int parseIdentifier (CodeDocument::Iterator& source) throw()
{
    static const tchar* keywords2Char[] =
        { T("if"), T("do"), T("or"), 0 };

    static const tchar* keywords3Char[] =
        { T("for"), T("int"), T("new"), T("try"), T("xor"), T("and"), T("asm"), T("not"), 0 };

    static const tchar* keywords4Char[] =
        { T("bool"), T("void"), T("this"), T("true"), T("long"), T("else"), T("char"),
          T("enum"), T("case"), T("goto"), T("auto"), 0 };

    static const tchar* keywords5Char[] =
        {  T("while"), T("bitor"), T("break"), T("catch"), T("class"), T("compl"), T("const"), T("false"),
            T("float"), T("short"), T("throw"), T("union"), T("using"), T("or_eq"), 0 };

    static const tchar* keywords6Char[] =
        { T("return"), T("struct"), T("and_eq"), T("bitand"), T("delete"), T("double"), T("extern"),
          T("friend"), T("inline"), T("not_eq"), T("public"), T("sizeof"), T("static"), T("signed"),
          T("switch"), T("typeid"), T("wchar_t"), T("xor_eq"), 0};

    static const tchar* keywordsOther[] =
        { T("const_cast"), T("continue"), T("default"), T("explicit"), T("mutable"), T("namespace"),
          T("operator"), T("private"), T("protected"), T("register"), T("reinterpret_cast"), T("static_cast"),
          T("template"), T("typedef"), T("typename"), T("unsigned"), T("virtual"), T("volatile"),
          T("@implementation"), T("@interface"), T("@end"), T("@synthesize"), T("@dynamic"), T("@public"),
          T("@private"), T("@property"), T("@protected"), T("@class"), 0 };

    int tokenLength = 0;
    tchar possibleIdentifier [19];

    while (isIdentifierBody (source.peekNextChar()))
    {
        const tchar c = source.nextChar();

        if (tokenLength < numElementsInArray (possibleIdentifier) - 1)
            possibleIdentifier [tokenLength] = c;

        ++tokenLength;
    }

    if (tokenLength > 1 && tokenLength <= 16)
    {
        possibleIdentifier [tokenLength] = 0;
        const tchar** k;

        switch (tokenLength)
        {
            case 2:     k = keywords2Char; break;
            case 3:     k = keywords3Char; break;
            case 4:     k = keywords4Char; break;
            case 5:     k = keywords5Char; break;
            case 6:     k = keywords6Char; break;
            default:    k = keywordsOther; break;
        }

        int i = 0;
        while (k[i] != 0)
        {
            if (k[i][0] == possibleIdentifier[0] && CharacterFunctions::compare (k[i], possibleIdentifier) == 0)
                return CPlusPlusCodeTokeniser::tokenType_builtInKeyword;

            ++i;
        }
    }

    return CPlusPlusCodeTokeniser::tokenType_identifier;
}