Пример #1
0
Int Lexer_ParseDigit(Lexer lexer, Bool isFirstContentOnLine)
{
	const Byte *src = lexer->src;
	const Byte *start, *digitsEnd;
	Token token = lexer->token;
	UInt64 value;
	String suffix;

	START_TOKEN(src - 1);
	start = src - 1;

	// Decimal integer, or possibly a real value (if we find a '.').
	if (!ParseDecimalInteger(lexer, &value)) {
		lexer->token->text = IllegalDecimalIntegerMessage;
		return END_TOKEN(TOKEN_ERROR);
	}
	digitsEnd = src = lexer->src;
	if (src < lexer->end && *src == '.' && (src + 1 == lexer->end || src[1] != '.')) {
		// Found a '.' (and it's not part of a ".."), so rewind back and re-parse this as a real or float value.
		lexer->src = start;
		return Lexer_ParseReal(lexer, isFirstContentOnLine);
	}
	else {
		// Collected a whole octal value, so finish it.
		suffix = CollectAlphanumericSuffix(lexer);
		if (!EnsureEndOfNumber(lexer)) return END_TOKEN(TOKEN_ERROR);
		END_TOKEN(TOKEN_INTEGER32);
		return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix);
	}
}
Пример #2
0
/// <summary>
/// Having seen a slash '/' on the input, parse either a comment or a punctuation name.
/// </summary>
/// <param name="lexer">The lexical analyzer.</param>
/// <param name="isFirstContentOnLine">Whether this identifier is the first non-whitespace non-comment content on its line.</param>
/// <returns>The next token that was found in the input, or TOKEN_NONE if no token was found (i.e., a comment).</returns>
Int Lexer_ParseSlash(Lexer lexer, Bool isFirstContentOnLine)
{
	Byte nextch;
	const Byte *src = lexer->src;
	const Byte *end = lexer->end;
	Token token = lexer->token;
	Int startLine;
	Byte ch;

	switch (nextch = *src++) {

		// A // single-line comment.
		case '/':
			while (src < end && (ch = *src) != '\n' && ch != '\r')
				src++;
			lexer->src = src;
			return TOKEN_NONE;

		// A multi-line comment.
		case '*':
			startLine = lexer->line;

			for (;;) {
				if (src >= end) {
					START_TOKEN(src - 1);
					lexer->token->text = String_FormatString(UnterminatedCommentMessage, startLine);
					return END_TOKEN(TOKEN_ERROR);
				}

				ch = *src++;
				if (ch == '\n') {
					if (src < end && *src == '\r')
						src++;
					lexer->line++;
				}
				else if (ch == '\r') {
					if (src < end && *src == '\n')
						src++;
					lexer->line++;
				}

				if (ch == '*' && src < end && *src == '/') {
					src++;
					break;
				}
			}

			lexer->src = src;
			return TOKEN_NONE;

		default:
			// Not a comment: General punctuation.
			lexer->src = src - 2;
			return Lexer_ParsePunctuation(lexer, isFirstContentOnLine);
	}
}
Пример #3
0
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
    START_LEXER();
    switch (lex_state) {
        case 1:
            START_TOKEN();
            if ((lookahead == '\t') ||
                (lookahead == ' '))
                ADVANCE(1);
            if (lookahead == '\n')
                ADVANCE(2);
            if (lookahead == '\r')
                ADVANCE(3);
            if (lookahead == '\"')
                ADVANCE(4);
            if ('0' <= lookahead && lookahead <= '9')
                ADVANCE(11);
            LEX_ERROR();
        case 2:
            START_TOKEN();
            ACCEPT_TOKEN(anon_sym_LF);
        case 3:
            START_TOKEN();
            ACCEPT_TOKEN(anon_sym_CR);
        case 4:
            if (lookahead == 'h')
                ADVANCE(5);
            LEX_ERROR();
        case 5:
            if (lookahead == 'e')
                ADVANCE(6);
            LEX_ERROR();
        case 6:
            if (lookahead == 'l')
                ADVANCE(7);
            LEX_ERROR();
        case 7:
            if (lookahead == 'l')
                ADVANCE(8);
            LEX_ERROR();
        case 8:
            if (lookahead == 'o')
                ADVANCE(9);
            LEX_ERROR();
        case 9:
            if (lookahead == '\"')
                ADVANCE(10);
            LEX_ERROR();
        case 10:
            ACCEPT_TOKEN(anon_sym_DQUOTEhello_DQUOTE);
        case 11:
            ACCEPT_TOKEN(aux_sym_SLASH_BSLASHd_SLASH);
        case 12:
            START_TOKEN();
            if (lookahead == 0)
                ADVANCE(13);
            if ((lookahead == '\t') ||
                (lookahead == '\n') ||
                (lookahead == '\r') ||
                (lookahead == ' '))
                ADVANCE(12);
            LEX_ERROR();
        case 13:
            ACCEPT_TOKEN(ts_builtin_sym_end);
        case 14:
            START_TOKEN();
            if (lookahead == 0)
                ADVANCE(13);
            if ((lookahead == '\t') ||
                (lookahead == ' '))
                ADVANCE(14);
            if (lookahead == '\n')
                ADVANCE(15);
            if (lookahead == '\r')
                ADVANCE(16);
            if (lookahead == '\"')
                ADVANCE(4);
            if ('0' <= lookahead && lookahead <= '9')
                ADVANCE(11);
            LEX_ERROR();
        case 15:
            START_TOKEN();
            ACCEPT_TOKEN(anon_sym_LF);
        case 16:
            START_TOKEN();
            ACCEPT_TOKEN(anon_sym_CR);
        case ts_lex_state_error:
            START_TOKEN();
            if (lookahead == 0)
                ADVANCE(13);
            if ((lookahead == '\t') ||
                (lookahead == ' '))
                ADVANCE(14);
            if (lookahead == '\n')
                ADVANCE(15);
            if (lookahead == '\r')
                ADVANCE(16);
            if (lookahead == '\"')
                ADVANCE(4);
            if ('0' <= lookahead && lookahead <= '9')
                ADVANCE(11);
            LEX_ERROR();
        default:
            LEX_ERROR();
    }
}
Пример #4
0
Int Lexer_ParseZero(Lexer lexer, Bool isFirstContentOnLine)
{
	const Byte *src = lexer->src;
	const Byte *end = lexer->end;
	const Byte *start, *digitsEnd;
	Token token = lexer->token;
	Byte ch;
	UInt64 value;
	String suffix;

	START_TOKEN(src - 1);
	start = src - 1;

	if (src < end && ((ch = *src) == 'x' || ch == 'X')) {
		// Hexadecimal integer, or possibly a zero byte.
		src++;

		if (src >= end || !(((ch = *src) >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))) {
			// Not an error; this is decimal zero, as a byte.
			lexer->src = src;
			if (!EnsureEndOfNumber(lexer)) {
				lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("x%c", ch));
				return END_TOKEN(TOKEN_ERROR);
			}
			END_TOKEN(TOKEN_INTEGER32);
			return ProcessIntegerValue(lexer, 0, ZeroString, LowercaseXString);
		}
		else {
			// This is a hexadecimal integer.
			lexer->src = src;
			if (!ParseHexadecimalInteger(lexer, &value)) {
				lexer->token->text = IllegalHexadecimalIntegerMessage;
				return END_TOKEN(TOKEN_ERROR);
			}
			digitsEnd = src = lexer->src;
			suffix = CollectAlphanumericSuffix(lexer);
			if (!EnsureEndOfNumber(lexer)) {
				lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("%c", *lexer->src));
				return END_TOKEN(TOKEN_ERROR);
			}
			END_TOKEN(TOKEN_INTEGER32);
			return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix);
		}
	}
	else {
		// Octal integer, or possibly a real value (if we find a '.').
		if (!ParseOctalInteger(lexer, &value)) {
			lexer->token->text = IllegalOctalIntegerMessage;
			return END_TOKEN(TOKEN_ERROR);
		}
		digitsEnd = src = lexer->src;
		if (src < lexer->end && *src == '.' && (src+1 >= lexer->end || src[1] != '.')) {
			// Found a '.' (and it's not part of a ".."), so rewind back and re-parse this as a real or float value.
			lexer->src = start;
			return Lexer_ParseReal(lexer, isFirstContentOnLine);
		}
		else {
			// Collected a whole octal value, so finish it.
			suffix = CollectAlphanumericSuffix(lexer);
			if (!EnsureEndOfNumber(lexer)) {
				lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("%c", *lexer->src));
				return END_TOKEN(TOKEN_ERROR);
			}
			END_TOKEN(TOKEN_INTEGER32);
			return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix);
		}
	}
}
Пример #5
0
Int Lexer_ParseReal(Lexer lexer, Bool isFirstContentOnLine)
{
	DECLARE_INLINE_STRINGBUILDER(digitBuilder, 256);	// 256 is plenty for most numbers, but it can grow if necessary.
	const Byte *src = lexer->src;
	const Byte *end = lexer->end;
	const Byte *start;
	Byte ch;
	Token token = lexer->token;
	Int integerDigitCount = 0;
	Int fractionalDigitCount = 0;
	const Byte *digits;
	String digitString, suffix;
	const Byte *suffixText;
	Float64 float64;

	UNUSED(isFirstContentOnLine);

	INIT_INLINE_STRINGBUILDER(digitBuilder);

	START_TOKEN(src);

	// Collect integer digits.
	start = src;
	while (src < end && (ch = *src) >= '0' && ch <= '9') {
		src++;
		if (src + 1 < end
			&& ((ch = *src) == '\'' || ch == '\"' || ch == '_')
			&& src[1] >= '0' && src[1] <= '9') {
			if (src > start) {
				StringBuilder_Append(digitBuilder, start, 0, src - start);
			}
			src++;
			start = src;
		}
	}

	// Copy into the digitBuilder whatever integers are left.
	if (src > start) {
		StringBuilder_Append(digitBuilder, start, 0, src - start);
	}

	integerDigitCount = StringBuilder_GetLength(digitBuilder);

	// Collect the decimal point.
	if (src < end && *src == '.') {
		src++;

		// Collect fractional digits.
		while (src < end && (ch = *src) >= '0' && ch <= '9') {
			src++;
			if (src + 1 < end
				&& ((ch = *src) == '\'' || ch == '\"' || ch == '_')
				&& src[1] >= '0' && src[1] <= '9') {
				if (src > start) {
					StringBuilder_Append(digitBuilder, start, 0, src - start);
				}
				src++;
				start = src;
			}
		}

		fractionalDigitCount = StringBuilder_GetLength(digitBuilder) - 1 - integerDigitCount;
	}

	// Finally copy into the digitBuilder whatever's left.
	if (src > start) {
		StringBuilder_Append(digitBuilder, start, 0, src - start);
	}
	lexer->src = src;

	// Make the result C-friendly.
	StringBuilder_AppendByte(digitBuilder, '\0');

	// Extract out the raw text of the number.
	digitString = StringBuilder_ToString(digitBuilder);
	digits = String_GetBytes(digitString);

	// Get any trailing type identifiers.
	suffix = CollectAlphanumericSuffix(lexer);

	// And make sure the result is clean.
	if (!EnsureEndOfNumber(lexer)) {
		token->text = IllegalRealValueMessage;
		return END_TOKEN(TOKEN_ERROR);
	}

	suffixText = String_GetBytes(suffix);
	if (suffixText[0] == '\0') {
		// Real64.
		if (!Real64_TryParse(digitString, &token->data.real64)) {
			token->text = IllegalRealValueMessage;
			return END_TOKEN(TOKEN_ERROR);
		}
		token->text = digitString;
		return END_TOKEN(TOKEN_REAL64);
	}
	else if (suffixText[0] == 'F' || suffixText[0] == 'f') {
		if (suffixText[1] == '\0') {
			// Float64.
			float64 = strtod(digits, NULL);
			token->data.float64 = float64;
			token->text = digitString;
			return END_TOKEN(TOKEN_FLOAT64);
		}
		else goto badSuffix;
	}
	else if (suffixText[0] == 'L' || suffixText[0] == 'l') {
		// 128-bit something-or-other.
		if (suffixText[1] == '\0') {
			// Real128.
			if (!Real128_TryParse(digitString, &token->data.real128)) {
				token->text = IllegalRealValueMessage;
				return END_TOKEN(TOKEN_ERROR);
			}
			token->text = String_Concat(digitString, suffix);
			return END_TOKEN(TOKEN_REAL128);
		}
		else if ((suffixText[1] == 'F' || suffixText[1] == 'f') && suffixText[2] == '\0') {
			// Float128 (not yet supported).
			goto badSuffix;
		}
		else goto badSuffix;
	}
	else if (suffixText[0] == 'H' || suffixText[0] == 'h') {
		// 32-bit something-or-other.
		if (suffixText[1] == '\0') {
			// Real32.
			if (!Real32_TryParse(digitString, &token->data.real32)) {
				token->text = IllegalRealValueMessage;
				return END_TOKEN(TOKEN_ERROR);
			}
			token->text = String_Concat(digitString, suffix);
			return END_TOKEN(TOKEN_REAL32);
		}
		else if ((suffixText[1] == 'F' || suffixText[1] == 'f') && suffixText[2] == '\0') {
			// Float32.
			float64 = strtod(digits, NULL);
			token->data.float32 = (Float32)float64;
			token->text = digitString;
			return END_TOKEN(TOKEN_FLOAT32);
		}
		else goto badSuffix;
	}
	else goto badSuffix;

badSuffix:
	token->text = String_FormatString(IllegalNumericSuffixMessage, suffix);
	return END_TOKEN(TOKEN_ERROR);
}