Example #1
0
void Lexal::skipSpaces()
{
    while (! reader.isEof()) {
        wchar_t ch = reader.getNextChar();
        pos++;
        if (! isWhiteSpace(ch)) {
            if ('#' == ch)
                skipToLineEnd();
            else {
                bool finish = false;
                if (('/' == ch) && (! reader.isEof())) {
                    wchar_t nextCh = reader.getNextChar();
                    pos++;
                    if ('/' == nextCh)
                        skipToLineEnd();
                    else if ('*' == nextCh)
                        skipMultilineComment(line, pos);
                    else {
                        pos--;
                        reader.ungetChar(nextCh);
                        finish = true;
                    }
                } else
                    finish = true;
                if (finish) {
                    pos--;
                    reader.ungetChar(ch);
                    return;
                }
            }
        } else
            if ('\n' == ch) {
                pos = 0;
                line++;
            }
    }
}
Example #2
0
/* Tokenizer::readToken
 * Reads the next 'token' from the text & moves past it
 *******************************************************************/
void Tokenizer::readToken(bool toeol)
{
	token_current.clear();
	bool ready = false;
	qstring = false;

	// Increment pointer to next token
	while (!ready)
	{
		ready = true;

		// Increment pointer until non-whitespace is found
		while (isWhitespace(current[0]))
		{
			// Return if end of text found
			if (!incrementCurrent())
				return;
		}

		// Skip C-style comments
		if (comments & CCOMMENTS)
		{
			// Check if we have a line comment
			if (current + 1 < end && current[0] == '/' && current[1] == '/')
			{
				ready = false;

				// DECORATE //$ handling
				if (!decorate)
					skipLineComment();
				else if (current + 2 < end && current[2] != '$')
					skipLineComment();
				else
					ready = true;
			}

			// Check if we have a multiline comment
			if (current + 1 != end && current[0] == '/' && current[1] == '*')
			{
				skipMultilineComment(); // Skip it
				ready = false;
			}
		}

		// Skip '##' comments
		if (comments & DCOMMENTS)
		{
			if (current + 1 != end && current[0] == '#' && current[1] == '#')
			{
				skipLineComment(); // Skip it
				ready = false;
			}
		}

		// Skip '#' comments
		if (comments & HCOMMENTS)
		{
			if (current + 1 != end && current[0] == '#')
			{
				skipLineComment(); // Skip it
				ready = false;
			}
		}

		// Skip ';' comments
		if (comments & SCOMMENTS)
		{
			if (current[0] == ';')
			{
				skipLineComment(); // Skip it
				ready = false;
			}
		}

		// Check for end of text
		if (position == size)
			return;
	}

	// Init token delimiters
	t_start = position;
	t_end = position;

	// If we're at a special character, it's our token
	if (isSpecialCharacter(current[0]))
	{
		token_current += current[0];
		t_end = position + 1;
		incrementCurrent();
		return;
	}

	// Now read the token
	if (current[0] == '\"')   // If we have a literal string (enclosed with "")
	{
		qstring = true;

		// Skip opening "
		incrementCurrent();

		// Read literal string (include whitespace)
		while (current[0] != '\"')
		{
			//if (position < size - 1 && current[0] == '\\' && current[1] == '\"')
			if (current[0] == '\\')
				incrementCurrent();

			token_current += current[0];

			if (!incrementCurrent())
				return;
		}

		// Skip closing "
		incrementCurrent();
	}
	else
	{
		// Read token (don't include whitespace)
		while (!((!toeol && isWhitespace(current[0])) || current[0] == '\n'))
		{
			// Return if special character found
			if (!toeol && isSpecialCharacter(current[0]))
				return;

			// Add current character to the token
			token_current += current[0];

			// Return if end of text found
			if (!incrementCurrent())
				return;
		}
	}

	// Write token to log if debug mode enabled
	if (debug)
		wxLogMessage(token_current);

	// Return the token
	return;
}
Example #3
0
void Lexer::tokenize() {
    while (location.ptr < eof) {
        bool newLine = false;
        char c = *location.ptr;
        switch (c) {
        case ' ':
        case '\t':
        case '\r':
            storePreviousToken();
            break;
        case '\n':
            newLine = true;
            storePreviousToken();
            storeToken(Token::Newline);
            break;
        case 'A':
        case 'B':
        case 'C':
        case 'D':
        case 'E':
        case 'F':
        case 'G':
        case 'H':
        case 'I':
        case 'J':
        case 'K':
        case 'L':
        case 'M':
        case 'N':
        case 'O':
        case 'P':
        case 'Q':
        case 'R':
        case 'S':
        case 'T':
        case 'U':
        case 'V':
        case 'W':
        case 'X':
        case 'Y':
        case 'Z':
        case 'a':
        case 'b':
        case 'c':
        case 'd':
        case 'e':
        case 'f':
        case 'g':
        case 'h':
        case 'i':
        case 'j':
        case 'k':
        case 'l':
        case 'm':
        case 'n':
        case 'o':
        case 'p':
        case 'q':
        case 'r':
        case 's':
        case 't':
        case 'u':
        case 'v':
        case 'w':
        case 'x':
        case 'y':
        case 'z':
        case '_':
            switch (state) {
            case Idle:
                state = GettingIdentifier;
                start = location;
                break;
            case GettingIntegerNumber:
            case GettingFloatingPointNumber:
                storeToken(Token::Invalid);
                break;
            default:
                break;
            }
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            switch (state) {
            case Idle:
                state = GettingIntegerNumber;
                start = location;
                break;
            default:
                break;
            }
            break;
        case '/':
            storePreviousToken();
            if (isNextChar('/', location.ptr)) {
                skipUntilNewline();
                continue;
            } else if (isNextChar('*', location.ptr)) {
                skipMultilineComment();
                continue;
            } else {
                makeOperatorToken(location);
            }
            break;
        case '=':
        case '!':
        case '+':
        case '-':
        case '*':
        case '%':
        case '.':
        case ',':
        case '>':
        case '<':
        case ':':
        case ';':
        case '?':
        case '|':
        case '&':
        case '^':
        case '~':
        case '(':
        case ')':
        case '{':
        case '}':
        case '[':
        case ']':
            if (c == '.' && !isNextChar('.') &&
                    state == GettingIntegerNumber) {
                state = GettingFloatingPointNumber;
            } else {
                storePreviousToken();
                makeOperatorToken(location);
            }
            break;
        case '"':
            storePreviousToken();
            makeStringLiteral(location.ptr);
            continue;
        case '\'':
            storePreviousToken();
            makeCharLiteral();
            continue;
        default:
            storeToken(Token::Invalid);
            break;
        }
        if (newLine) {
            location.stepLine();
        } else {
            location.stepColumn();
        }
    }
    storeToken(Token::Eof);
}
char * removeComments(char* source) {

    /*char* rez = stradd("as\0", 'b');*/
    /*printf("[%s]", rez);*/
    /*return 1;*/

    const int DEBUG = 0;
    const char FIRST_COMMENT_SYMBOL = '/';
    const char SINGLELINE = '/';
    const char MULTILINE = '*';
    const char IS_NOT_COMMENT = -1;

    char* result = "\0";
    int matchedPreviously = 0, i = 0;

    if (DEBUG) {
        printf("ORIGINAL:[%s]\n", source);
    }

    int sourceLen = strlen(source);

    for(i = 0; i < sourceLen; i++) {

        // printf("\n[%s]\n", source);
        char curChar = source[i];
        char typeOfComment = IS_NOT_COMMENT;

        if (matchedPreviously) {

            matchedPreviously = 0;

            if (curChar == MULTILINE) {
                typeOfComment = MULTILINE;
            }else if (curChar == SINGLELINE) {
                typeOfComment = SINGLELINE; 
            }
            if (typeOfComment == IS_NOT_COMMENT) {
                //if comment was not matched add first comment symbol(/) to string
                result = stradd(result, FIRST_COMMENT_SYMBOL);
            }else {
                if (typeOfComment == SINGLELINE) {
                    i = skipSingleLineComment(i+1, sourceLen, source);
                }else if (typeOfComment == MULTILINE) {

                    int rez = skipMultilineComment(i+1, sourceLen, source);

                    //if end of comment was not recognized
                    if (rez == -1) {
                        //return first two charackter that were skipped
                        result = stradd(result, FIRST_COMMENT_SYMBOL); 
                        result = stradd(result, MULTILINE); 
                    }else {
                        i = rez;
                    }
                }
            }
        }else {

            int isFirstCommentSymbol = curChar == FIRST_COMMENT_SYMBOL;
            if (!isFirstCommentSymbol) {
                result = stradd(result, curChar);
            }
            matchedPreviously = isFirstCommentSymbol;
        }

        if (DEBUG == 2) {
            printf("RESULT:[%s]\n", result);
        }
    }

    if (DEBUG) {
        printf("RESULT:[%s]", result);
    }
    return result;
}