Exemple #1
0
static Bool EnsureEndOfNumber(Lexer lexer)
{
	StringBuilder stringBuilder;
	const Byte *src = lexer->src;
	const Byte *end = lexer->end;
	Byte ch;
	UInt identifierCharacterKind;
	Int32 code;

	// If we have extra garbage at the end of the number, complain.
	if (src < end) {

		// Read the next Unicode code point.
		if ((ch = *src++) < 128)
			code = ch;
		else {
			src--;
			code = String_ExtractUnicodeCharacterInternal(&src, end);
		}

		// Disallow anything that smells like a letterform.
		identifierCharacterKind = SmileIdentifierKind(code);
		if (identifierCharacterKind & (IDENTKIND_STARTLETTER | IDENTKIND_MIDDLELETTER)) {
			stringBuilder = StringBuilder_Create();
			StringBuilder_AppendUnicode(stringBuilder, code);
			lexer->src = src;
			lexer->token->text = String_FormatString(TrailingGarbageAfterNumberMessage, StringBuilder_ToString(stringBuilder));
			return False;
		}
	}

	return True;
}
Exemple #2
0
/// <summary>
/// Having seen a slash '/' on the input, parse either a comment or a punctuation name.
/// </summary>
/// <param name="lexer">The lexical analyzer.</param>
/// <param name="isFirstContentOnLine">Whether this identifier is the first non-whitespace non-comment content on its line.</param>
/// <returns>The next token that was found in the input, or TOKEN_NONE if no token was found (i.e., a comment).</returns>
Int Lexer_ParseSlash(Lexer lexer, Bool isFirstContentOnLine)
{
	Byte nextch;
	const Byte *src = lexer->src;
	const Byte *end = lexer->end;
	Token token = lexer->token;
	Int startLine;
	Byte ch;

	switch (nextch = *src++) {

		// A // single-line comment.
		case '/':
			while (src < end && (ch = *src) != '\n' && ch != '\r')
				src++;
			lexer->src = src;
			return TOKEN_NONE;

		// A multi-line comment.
		case '*':
			startLine = lexer->line;

			for (;;) {
				if (src >= end) {
					START_TOKEN(src - 1);
					lexer->token->text = String_FormatString(UnterminatedCommentMessage, startLine);
					return END_TOKEN(TOKEN_ERROR);
				}

				ch = *src++;
				if (ch == '\n') {
					if (src < end && *src == '\r')
						src++;
					lexer->line++;
				}
				else if (ch == '\r') {
					if (src < end && *src == '\n')
						src++;
					lexer->line++;
				}

				if (ch == '*' && src < end && *src == '/') {
					src++;
					break;
				}
			}

			lexer->src = src;
			return TOKEN_NONE;

		default:
			// Not a comment: General punctuation.
			lexer->src = src - 2;
			return Lexer_ParsePunctuation(lexer, isFirstContentOnLine);
	}
}
Exemple #3
0
Int Lexer_ParseReal(Lexer lexer, Bool isFirstContentOnLine)
{
	DECLARE_INLINE_STRINGBUILDER(digitBuilder, 256);	// 256 is plenty for most numbers, but it can grow if necessary.
	const Byte *src = lexer->src;
	const Byte *end = lexer->end;
	const Byte *start;
	Byte ch;
	Token token = lexer->token;
	Int integerDigitCount = 0;
	Int fractionalDigitCount = 0;
	const Byte *digits;
	String digitString, suffix;
	const Byte *suffixText;
	Float64 float64;

	UNUSED(isFirstContentOnLine);

	INIT_INLINE_STRINGBUILDER(digitBuilder);

	START_TOKEN(src);

	// Collect integer digits.
	start = src;
	while (src < end && (ch = *src) >= '0' && ch <= '9') {
		src++;
		if (src + 1 < end
			&& ((ch = *src) == '\'' || ch == '\"' || ch == '_')
			&& src[1] >= '0' && src[1] <= '9') {
			if (src > start) {
				StringBuilder_Append(digitBuilder, start, 0, src - start);
			}
			src++;
			start = src;
		}
	}

	// Copy into the digitBuilder whatever integers are left.
	if (src > start) {
		StringBuilder_Append(digitBuilder, start, 0, src - start);
	}

	integerDigitCount = StringBuilder_GetLength(digitBuilder);

	// Collect the decimal point.
	if (src < end && *src == '.') {
		src++;

		// Collect fractional digits.
		while (src < end && (ch = *src) >= '0' && ch <= '9') {
			src++;
			if (src + 1 < end
				&& ((ch = *src) == '\'' || ch == '\"' || ch == '_')
				&& src[1] >= '0' && src[1] <= '9') {
				if (src > start) {
					StringBuilder_Append(digitBuilder, start, 0, src - start);
				}
				src++;
				start = src;
			}
		}

		fractionalDigitCount = StringBuilder_GetLength(digitBuilder) - 1 - integerDigitCount;
	}

	// Finally copy into the digitBuilder whatever's left.
	if (src > start) {
		StringBuilder_Append(digitBuilder, start, 0, src - start);
	}
	lexer->src = src;

	// Make the result C-friendly.
	StringBuilder_AppendByte(digitBuilder, '\0');

	// Extract out the raw text of the number.
	digitString = StringBuilder_ToString(digitBuilder);
	digits = String_GetBytes(digitString);

	// Get any trailing type identifiers.
	suffix = CollectAlphanumericSuffix(lexer);

	// And make sure the result is clean.
	if (!EnsureEndOfNumber(lexer)) {
		token->text = IllegalRealValueMessage;
		return END_TOKEN(TOKEN_ERROR);
	}

	suffixText = String_GetBytes(suffix);
	if (suffixText[0] == '\0') {
		// Real64.
		if (!Real64_TryParse(digitString, &token->data.real64)) {
			token->text = IllegalRealValueMessage;
			return END_TOKEN(TOKEN_ERROR);
		}
		token->text = digitString;
		return END_TOKEN(TOKEN_REAL64);
	}
	else if (suffixText[0] == 'F' || suffixText[0] == 'f') {
		if (suffixText[1] == '\0') {
			// Float64.
			float64 = strtod(digits, NULL);
			token->data.float64 = float64;
			token->text = digitString;
			return END_TOKEN(TOKEN_FLOAT64);
		}
		else goto badSuffix;
	}
	else if (suffixText[0] == 'L' || suffixText[0] == 'l') {
		// 128-bit something-or-other.
		if (suffixText[1] == '\0') {
			// Real128.
			if (!Real128_TryParse(digitString, &token->data.real128)) {
				token->text = IllegalRealValueMessage;
				return END_TOKEN(TOKEN_ERROR);
			}
			token->text = String_Concat(digitString, suffix);
			return END_TOKEN(TOKEN_REAL128);
		}
		else if ((suffixText[1] == 'F' || suffixText[1] == 'f') && suffixText[2] == '\0') {
			// Float128 (not yet supported).
			goto badSuffix;
		}
		else goto badSuffix;
	}
	else if (suffixText[0] == 'H' || suffixText[0] == 'h') {
		// 32-bit something-or-other.
		if (suffixText[1] == '\0') {
			// Real32.
			if (!Real32_TryParse(digitString, &token->data.real32)) {
				token->text = IllegalRealValueMessage;
				return END_TOKEN(TOKEN_ERROR);
			}
			token->text = String_Concat(digitString, suffix);
			return END_TOKEN(TOKEN_REAL32);
		}
		else if ((suffixText[1] == 'F' || suffixText[1] == 'f') && suffixText[2] == '\0') {
			// Float32.
			float64 = strtod(digits, NULL);
			token->data.float32 = (Float32)float64;
			token->text = digitString;
			return END_TOKEN(TOKEN_FLOAT32);
		}
		else goto badSuffix;
	}
	else goto badSuffix;

badSuffix:
	token->text = String_FormatString(IllegalNumericSuffixMessage, suffix);
	return END_TOKEN(TOKEN_ERROR);
}
Exemple #4
0
Int Lexer_ParseZero(Lexer lexer, Bool isFirstContentOnLine)
{
	const Byte *src = lexer->src;
	const Byte *end = lexer->end;
	const Byte *start, *digitsEnd;
	Token token = lexer->token;
	Byte ch;
	UInt64 value;
	String suffix;

	START_TOKEN(src - 1);
	start = src - 1;

	if (src < end && ((ch = *src) == 'x' || ch == 'X')) {
		// Hexadecimal integer, or possibly a zero byte.
		src++;

		if (src >= end || !(((ch = *src) >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))) {
			// Not an error; this is decimal zero, as a byte.
			lexer->src = src;
			if (!EnsureEndOfNumber(lexer)) {
				lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("x%c", ch));
				return END_TOKEN(TOKEN_ERROR);
			}
			END_TOKEN(TOKEN_INTEGER32);
			return ProcessIntegerValue(lexer, 0, ZeroString, LowercaseXString);
		}
		else {
			// This is a hexadecimal integer.
			lexer->src = src;
			if (!ParseHexadecimalInteger(lexer, &value)) {
				lexer->token->text = IllegalHexadecimalIntegerMessage;
				return END_TOKEN(TOKEN_ERROR);
			}
			digitsEnd = src = lexer->src;
			suffix = CollectAlphanumericSuffix(lexer);
			if (!EnsureEndOfNumber(lexer)) {
				lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("%c", *lexer->src));
				return END_TOKEN(TOKEN_ERROR);
			}
			END_TOKEN(TOKEN_INTEGER32);
			return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix);
		}
	}
	else {
		// Octal integer, or possibly a real value (if we find a '.').
		if (!ParseOctalInteger(lexer, &value)) {
			lexer->token->text = IllegalOctalIntegerMessage;
			return END_TOKEN(TOKEN_ERROR);
		}
		digitsEnd = src = lexer->src;
		if (src < lexer->end && *src == '.' && (src+1 >= lexer->end || src[1] != '.')) {
			// Found a '.' (and it's not part of a ".."), so rewind back and re-parse this as a real or float value.
			lexer->src = start;
			return Lexer_ParseReal(lexer, isFirstContentOnLine);
		}
		else {
			// Collected a whole octal value, so finish it.
			suffix = CollectAlphanumericSuffix(lexer);
			if (!EnsureEndOfNumber(lexer)) {
				lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("%c", *lexer->src));
				return END_TOKEN(TOKEN_ERROR);
			}
			END_TOKEN(TOKEN_INTEGER32);
			return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix);
		}
	}
}
Exemple #5
0
Inline Int ProcessIntegerValue(Lexer lexer, UInt64 value, String text, String suffix)
{
	const Byte *suffixText = String_GetBytes(suffix);
	Int suffixLength = String_Length(suffix);

	if (String_IsNullOrEmpty(suffix)) {
		if (value >= (1ULL << 32)) {
			lexer->token->text = String_FormatString(IllegalNumericSizeMessage, "Integer32");
			return (lexer->token->kind = TOKEN_ERROR);
		}
		else {
			lexer->token->data.i = (Int32)(UInt32)value;
			lexer->token->text = text;
			return (lexer->token->kind = TOKEN_INTEGER32);
		}
	}

	switch (suffixText[0]) {

		case 'l': case 'L':
			if (suffixLength == 1) {
				lexer->token->data.int64 = (Int64)value;
				lexer->token->text = text;
				return (lexer->token->kind = TOKEN_INTEGER64);
			}
			else goto unknown_suffix;

		case 'h': case 'H':
			if (suffixLength == 1) {
				if (value > 65536) {
					lexer->token->text = String_FormatString(IllegalNumericSizeMessage, "Integer16");
					return (lexer->token->kind = TOKEN_ERROR);
				}
				else {
					lexer->token->data.i = (Int32)(UInt32)value;
					lexer->token->text = text;
					return (lexer->token->kind = TOKEN_INTEGER16);
				}
			}
			else goto unknown_suffix;

		case 'x': case 'X':
			if (suffixLength == 1) {
				if (value > 256) {
					lexer->token->text = String_FormatString(IllegalNumericSizeMessage, "Byte");
					return (lexer->token->kind = TOKEN_ERROR);
				}
				else {
					lexer->token->data.i = (Int32)(UInt32)value;
					lexer->token->text = text;
					return (lexer->token->kind = TOKEN_BYTE);
				}
			}
			else goto unknown_suffix;

		default:
		unknown_suffix:
			lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, suffix);
			return (lexer->token->kind = TOKEN_ERROR);
	}
}