static Bool EnsureEndOfNumber(Lexer lexer) { StringBuilder stringBuilder; const Byte *src = lexer->src; const Byte *end = lexer->end; Byte ch; UInt identifierCharacterKind; Int32 code; // If we have extra garbage at the end of the number, complain. if (src < end) { // Read the next Unicode code point. if ((ch = *src++) < 128) code = ch; else { src--; code = String_ExtractUnicodeCharacterInternal(&src, end); } // Disallow anything that smells like a letterform. identifierCharacterKind = SmileIdentifierKind(code); if (identifierCharacterKind & (IDENTKIND_STARTLETTER | IDENTKIND_MIDDLELETTER)) { stringBuilder = StringBuilder_Create(); StringBuilder_AppendUnicode(stringBuilder, code); lexer->src = src; lexer->token->text = String_FormatString(TrailingGarbageAfterNumberMessage, StringBuilder_ToString(stringBuilder)); return False; } } return True; }
/// <summary> /// Having seen a slash '/' on the input, parse either a comment or a punctuation name. /// </summary> /// <param name="lexer">The lexical analyzer.</param> /// <param name="isFirstContentOnLine">Whether this identifier is the first non-whitespace non-comment content on its line.</param> /// <returns>The next token that was found in the input, or TOKEN_NONE if no token was found (i.e., a comment).</returns> Int Lexer_ParseSlash(Lexer lexer, Bool isFirstContentOnLine) { Byte nextch; const Byte *src = lexer->src; const Byte *end = lexer->end; Token token = lexer->token; Int startLine; Byte ch; switch (nextch = *src++) { // A // single-line comment. case '/': while (src < end && (ch = *src) != '\n' && ch != '\r') src++; lexer->src = src; return TOKEN_NONE; // A multi-line comment. case '*': startLine = lexer->line; for (;;) { if (src >= end) { START_TOKEN(src - 1); lexer->token->text = String_FormatString(UnterminatedCommentMessage, startLine); return END_TOKEN(TOKEN_ERROR); } ch = *src++; if (ch == '\n') { if (src < end && *src == '\r') src++; lexer->line++; } else if (ch == '\r') { if (src < end && *src == '\n') src++; lexer->line++; } if (ch == '*' && src < end && *src == '/') { src++; break; } } lexer->src = src; return TOKEN_NONE; default: // Not a comment: General punctuation. lexer->src = src - 2; return Lexer_ParsePunctuation(lexer, isFirstContentOnLine); } }
Int Lexer_ParseReal(Lexer lexer, Bool isFirstContentOnLine) { DECLARE_INLINE_STRINGBUILDER(digitBuilder, 256); // 256 is plenty for most numbers, but it can grow if necessary. const Byte *src = lexer->src; const Byte *end = lexer->end; const Byte *start; Byte ch; Token token = lexer->token; Int integerDigitCount = 0; Int fractionalDigitCount = 0; const Byte *digits; String digitString, suffix; const Byte *suffixText; Float64 float64; UNUSED(isFirstContentOnLine); INIT_INLINE_STRINGBUILDER(digitBuilder); START_TOKEN(src); // Collect integer digits. start = src; while (src < end && (ch = *src) >= '0' && ch <= '9') { src++; if (src + 1 < end && ((ch = *src) == '\'' || ch == '\"' || ch == '_') && src[1] >= '0' && src[1] <= '9') { if (src > start) { StringBuilder_Append(digitBuilder, start, 0, src - start); } src++; start = src; } } // Copy into the digitBuilder whatever integers are left. if (src > start) { StringBuilder_Append(digitBuilder, start, 0, src - start); } integerDigitCount = StringBuilder_GetLength(digitBuilder); // Collect the decimal point. if (src < end && *src == '.') { src++; // Collect fractional digits. while (src < end && (ch = *src) >= '0' && ch <= '9') { src++; if (src + 1 < end && ((ch = *src) == '\'' || ch == '\"' || ch == '_') && src[1] >= '0' && src[1] <= '9') { if (src > start) { StringBuilder_Append(digitBuilder, start, 0, src - start); } src++; start = src; } } fractionalDigitCount = StringBuilder_GetLength(digitBuilder) - 1 - integerDigitCount; } // Finally copy into the digitBuilder whatever's left. if (src > start) { StringBuilder_Append(digitBuilder, start, 0, src - start); } lexer->src = src; // Make the result C-friendly. StringBuilder_AppendByte(digitBuilder, '\0'); // Extract out the raw text of the number. digitString = StringBuilder_ToString(digitBuilder); digits = String_GetBytes(digitString); // Get any trailing type identifiers. suffix = CollectAlphanumericSuffix(lexer); // And make sure the result is clean. if (!EnsureEndOfNumber(lexer)) { token->text = IllegalRealValueMessage; return END_TOKEN(TOKEN_ERROR); } suffixText = String_GetBytes(suffix); if (suffixText[0] == '\0') { // Real64. if (!Real64_TryParse(digitString, &token->data.real64)) { token->text = IllegalRealValueMessage; return END_TOKEN(TOKEN_ERROR); } token->text = digitString; return END_TOKEN(TOKEN_REAL64); } else if (suffixText[0] == 'F' || suffixText[0] == 'f') { if (suffixText[1] == '\0') { // Float64. float64 = strtod(digits, NULL); token->data.float64 = float64; token->text = digitString; return END_TOKEN(TOKEN_FLOAT64); } else goto badSuffix; } else if (suffixText[0] == 'L' || suffixText[0] == 'l') { // 128-bit something-or-other. if (suffixText[1] == '\0') { // Real128. if (!Real128_TryParse(digitString, &token->data.real128)) { token->text = IllegalRealValueMessage; return END_TOKEN(TOKEN_ERROR); } token->text = String_Concat(digitString, suffix); return END_TOKEN(TOKEN_REAL128); } else if ((suffixText[1] == 'F' || suffixText[1] == 'f') && suffixText[2] == '\0') { // Float128 (not yet supported). goto badSuffix; } else goto badSuffix; } else if (suffixText[0] == 'H' || suffixText[0] == 'h') { // 32-bit something-or-other. if (suffixText[1] == '\0') { // Real32. if (!Real32_TryParse(digitString, &token->data.real32)) { token->text = IllegalRealValueMessage; return END_TOKEN(TOKEN_ERROR); } token->text = String_Concat(digitString, suffix); return END_TOKEN(TOKEN_REAL32); } else if ((suffixText[1] == 'F' || suffixText[1] == 'f') && suffixText[2] == '\0') { // Float32. float64 = strtod(digits, NULL); token->data.float32 = (Float32)float64; token->text = digitString; return END_TOKEN(TOKEN_FLOAT32); } else goto badSuffix; } else goto badSuffix; badSuffix: token->text = String_FormatString(IllegalNumericSuffixMessage, suffix); return END_TOKEN(TOKEN_ERROR); }
Int Lexer_ParseZero(Lexer lexer, Bool isFirstContentOnLine) { const Byte *src = lexer->src; const Byte *end = lexer->end; const Byte *start, *digitsEnd; Token token = lexer->token; Byte ch; UInt64 value; String suffix; START_TOKEN(src - 1); start = src - 1; if (src < end && ((ch = *src) == 'x' || ch == 'X')) { // Hexadecimal integer, or possibly a zero byte. src++; if (src >= end || !(((ch = *src) >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))) { // Not an error; this is decimal zero, as a byte. lexer->src = src; if (!EnsureEndOfNumber(lexer)) { lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("x%c", ch)); return END_TOKEN(TOKEN_ERROR); } END_TOKEN(TOKEN_INTEGER32); return ProcessIntegerValue(lexer, 0, ZeroString, LowercaseXString); } else { // This is a hexadecimal integer. lexer->src = src; if (!ParseHexadecimalInteger(lexer, &value)) { lexer->token->text = IllegalHexadecimalIntegerMessage; return END_TOKEN(TOKEN_ERROR); } digitsEnd = src = lexer->src; suffix = CollectAlphanumericSuffix(lexer); if (!EnsureEndOfNumber(lexer)) { lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("%c", *lexer->src)); return END_TOKEN(TOKEN_ERROR); } END_TOKEN(TOKEN_INTEGER32); return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix); } } else { // Octal integer, or possibly a real value (if we find a '.'). if (!ParseOctalInteger(lexer, &value)) { lexer->token->text = IllegalOctalIntegerMessage; return END_TOKEN(TOKEN_ERROR); } digitsEnd = src = lexer->src; if (src < lexer->end && *src == '.' && (src+1 >= lexer->end || src[1] != '.')) { // Found a '.' (and it's not part of a ".."), so rewind back and re-parse this as a real or float value. lexer->src = start; return Lexer_ParseReal(lexer, isFirstContentOnLine); } else { // Collected a whole octal value, so finish it. suffix = CollectAlphanumericSuffix(lexer); if (!EnsureEndOfNumber(lexer)) { lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("%c", *lexer->src)); return END_TOKEN(TOKEN_ERROR); } END_TOKEN(TOKEN_INTEGER32); return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix); } } }
Inline Int ProcessIntegerValue(Lexer lexer, UInt64 value, String text, String suffix) { const Byte *suffixText = String_GetBytes(suffix); Int suffixLength = String_Length(suffix); if (String_IsNullOrEmpty(suffix)) { if (value >= (1ULL << 32)) { lexer->token->text = String_FormatString(IllegalNumericSizeMessage, "Integer32"); return (lexer->token->kind = TOKEN_ERROR); } else { lexer->token->data.i = (Int32)(UInt32)value; lexer->token->text = text; return (lexer->token->kind = TOKEN_INTEGER32); } } switch (suffixText[0]) { case 'l': case 'L': if (suffixLength == 1) { lexer->token->data.int64 = (Int64)value; lexer->token->text = text; return (lexer->token->kind = TOKEN_INTEGER64); } else goto unknown_suffix; case 'h': case 'H': if (suffixLength == 1) { if (value > 65536) { lexer->token->text = String_FormatString(IllegalNumericSizeMessage, "Integer16"); return (lexer->token->kind = TOKEN_ERROR); } else { lexer->token->data.i = (Int32)(UInt32)value; lexer->token->text = text; return (lexer->token->kind = TOKEN_INTEGER16); } } else goto unknown_suffix; case 'x': case 'X': if (suffixLength == 1) { if (value > 256) { lexer->token->text = String_FormatString(IllegalNumericSizeMessage, "Byte"); return (lexer->token->kind = TOKEN_ERROR); } else { lexer->token->data.i = (Int32)(UInt32)value; lexer->token->text = text; return (lexer->token->kind = TOKEN_BYTE); } } else goto unknown_suffix; default: unknown_suffix: lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, suffix); return (lexer->token->kind = TOKEN_ERROR); } }