Int Lexer_ParseDigit(Lexer lexer, Bool isFirstContentOnLine) { const Byte *src = lexer->src; const Byte *start, *digitsEnd; Token token = lexer->token; UInt64 value; String suffix; START_TOKEN(src - 1); start = src - 1; // Decimal integer, or possibly a real value (if we find a '.'). if (!ParseDecimalInteger(lexer, &value)) { lexer->token->text = IllegalDecimalIntegerMessage; return END_TOKEN(TOKEN_ERROR); } digitsEnd = src = lexer->src; if (src < lexer->end && *src == '.' && (src + 1 == lexer->end || src[1] != '.')) { // Found a '.' (and it's not part of a ".."), so rewind back and re-parse this as a real or float value. lexer->src = start; return Lexer_ParseReal(lexer, isFirstContentOnLine); } else { // Collected a whole octal value, so finish it. suffix = CollectAlphanumericSuffix(lexer); if (!EnsureEndOfNumber(lexer)) return END_TOKEN(TOKEN_ERROR); END_TOKEN(TOKEN_INTEGER32); return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix); } }
/// <summary> /// Having seen a slash '/' on the input, parse either a comment or a punctuation name. /// </summary> /// <param name="lexer">The lexical analyzer.</param> /// <param name="isFirstContentOnLine">Whether this identifier is the first non-whitespace non-comment content on its line.</param> /// <returns>The next token that was found in the input, or TOKEN_NONE if no token was found (i.e., a comment).</returns> Int Lexer_ParseSlash(Lexer lexer, Bool isFirstContentOnLine) { Byte nextch; const Byte *src = lexer->src; const Byte *end = lexer->end; Token token = lexer->token; Int startLine; Byte ch; switch (nextch = *src++) { // A // single-line comment. case '/': while (src < end && (ch = *src) != '\n' && ch != '\r') src++; lexer->src = src; return TOKEN_NONE; // A multi-line comment. case '*': startLine = lexer->line; for (;;) { if (src >= end) { START_TOKEN(src - 1); lexer->token->text = String_FormatString(UnterminatedCommentMessage, startLine); return END_TOKEN(TOKEN_ERROR); } ch = *src++; if (ch == '\n') { if (src < end && *src == '\r') src++; lexer->line++; } else if (ch == '\r') { if (src < end && *src == '\n') src++; lexer->line++; } if (ch == '*' && src < end && *src == '/') { src++; break; } } lexer->src = src; return TOKEN_NONE; default: // Not a comment: General punctuation. lexer->src = src - 2; return Lexer_ParsePunctuation(lexer, isFirstContentOnLine); } }
static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { START_LEXER(); switch (lex_state) { case 1: START_TOKEN(); if ((lookahead == '\t') || (lookahead == ' ')) ADVANCE(1); if (lookahead == '\n') ADVANCE(2); if (lookahead == '\r') ADVANCE(3); if (lookahead == '\"') ADVANCE(4); if ('0' <= lookahead && lookahead <= '9') ADVANCE(11); LEX_ERROR(); case 2: START_TOKEN(); ACCEPT_TOKEN(anon_sym_LF); case 3: START_TOKEN(); ACCEPT_TOKEN(anon_sym_CR); case 4: if (lookahead == 'h') ADVANCE(5); LEX_ERROR(); case 5: if (lookahead == 'e') ADVANCE(6); LEX_ERROR(); case 6: if (lookahead == 'l') ADVANCE(7); LEX_ERROR(); case 7: if (lookahead == 'l') ADVANCE(8); LEX_ERROR(); case 8: if (lookahead == 'o') ADVANCE(9); LEX_ERROR(); case 9: if (lookahead == '\"') ADVANCE(10); LEX_ERROR(); case 10: ACCEPT_TOKEN(anon_sym_DQUOTEhello_DQUOTE); case 11: ACCEPT_TOKEN(aux_sym_SLASH_BSLASHd_SLASH); case 12: START_TOKEN(); if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) ADVANCE(12); LEX_ERROR(); case 13: ACCEPT_TOKEN(ts_builtin_sym_end); case 14: START_TOKEN(); if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == ' ')) ADVANCE(14); if (lookahead == '\n') ADVANCE(15); if (lookahead == '\r') ADVANCE(16); if (lookahead == '\"') ADVANCE(4); if ('0' <= lookahead && lookahead <= '9') ADVANCE(11); LEX_ERROR(); case 15: START_TOKEN(); ACCEPT_TOKEN(anon_sym_LF); case 16: START_TOKEN(); ACCEPT_TOKEN(anon_sym_CR); case ts_lex_state_error: START_TOKEN(); if (lookahead == 0) ADVANCE(13); if ((lookahead == '\t') || (lookahead == ' ')) ADVANCE(14); if (lookahead == '\n') ADVANCE(15); if (lookahead == '\r') ADVANCE(16); if (lookahead == '\"') ADVANCE(4); if ('0' <= lookahead && lookahead <= '9') ADVANCE(11); LEX_ERROR(); default: LEX_ERROR(); } }
Int Lexer_ParseZero(Lexer lexer, Bool isFirstContentOnLine) { const Byte *src = lexer->src; const Byte *end = lexer->end; const Byte *start, *digitsEnd; Token token = lexer->token; Byte ch; UInt64 value; String suffix; START_TOKEN(src - 1); start = src - 1; if (src < end && ((ch = *src) == 'x' || ch == 'X')) { // Hexadecimal integer, or possibly a zero byte. src++; if (src >= end || !(((ch = *src) >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))) { // Not an error; this is decimal zero, as a byte. lexer->src = src; if (!EnsureEndOfNumber(lexer)) { lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("x%c", ch)); return END_TOKEN(TOKEN_ERROR); } END_TOKEN(TOKEN_INTEGER32); return ProcessIntegerValue(lexer, 0, ZeroString, LowercaseXString); } else { // This is a hexadecimal integer. lexer->src = src; if (!ParseHexadecimalInteger(lexer, &value)) { lexer->token->text = IllegalHexadecimalIntegerMessage; return END_TOKEN(TOKEN_ERROR); } digitsEnd = src = lexer->src; suffix = CollectAlphanumericSuffix(lexer); if (!EnsureEndOfNumber(lexer)) { lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("%c", *lexer->src)); return END_TOKEN(TOKEN_ERROR); } END_TOKEN(TOKEN_INTEGER32); return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix); } } else { // Octal integer, or possibly a real value (if we find a '.'). if (!ParseOctalInteger(lexer, &value)) { lexer->token->text = IllegalOctalIntegerMessage; return END_TOKEN(TOKEN_ERROR); } digitsEnd = src = lexer->src; if (src < lexer->end && *src == '.' && (src+1 >= lexer->end || src[1] != '.')) { // Found a '.' (and it's not part of a ".."), so rewind back and re-parse this as a real or float value. lexer->src = start; return Lexer_ParseReal(lexer, isFirstContentOnLine); } else { // Collected a whole octal value, so finish it. suffix = CollectAlphanumericSuffix(lexer); if (!EnsureEndOfNumber(lexer)) { lexer->token->text = String_FormatString(IllegalNumericSuffixMessage, String_Format("%c", *lexer->src)); return END_TOKEN(TOKEN_ERROR); } END_TOKEN(TOKEN_INTEGER32); return ProcessIntegerValue(lexer, value, String_Create(start, digitsEnd - start), suffix); } } }
Int Lexer_ParseReal(Lexer lexer, Bool isFirstContentOnLine) { DECLARE_INLINE_STRINGBUILDER(digitBuilder, 256); // 256 is plenty for most numbers, but it can grow if necessary. const Byte *src = lexer->src; const Byte *end = lexer->end; const Byte *start; Byte ch; Token token = lexer->token; Int integerDigitCount = 0; Int fractionalDigitCount = 0; const Byte *digits; String digitString, suffix; const Byte *suffixText; Float64 float64; UNUSED(isFirstContentOnLine); INIT_INLINE_STRINGBUILDER(digitBuilder); START_TOKEN(src); // Collect integer digits. start = src; while (src < end && (ch = *src) >= '0' && ch <= '9') { src++; if (src + 1 < end && ((ch = *src) == '\'' || ch == '\"' || ch == '_') && src[1] >= '0' && src[1] <= '9') { if (src > start) { StringBuilder_Append(digitBuilder, start, 0, src - start); } src++; start = src; } } // Copy into the digitBuilder whatever integers are left. if (src > start) { StringBuilder_Append(digitBuilder, start, 0, src - start); } integerDigitCount = StringBuilder_GetLength(digitBuilder); // Collect the decimal point. if (src < end && *src == '.') { src++; // Collect fractional digits. while (src < end && (ch = *src) >= '0' && ch <= '9') { src++; if (src + 1 < end && ((ch = *src) == '\'' || ch == '\"' || ch == '_') && src[1] >= '0' && src[1] <= '9') { if (src > start) { StringBuilder_Append(digitBuilder, start, 0, src - start); } src++; start = src; } } fractionalDigitCount = StringBuilder_GetLength(digitBuilder) - 1 - integerDigitCount; } // Finally copy into the digitBuilder whatever's left. if (src > start) { StringBuilder_Append(digitBuilder, start, 0, src - start); } lexer->src = src; // Make the result C-friendly. StringBuilder_AppendByte(digitBuilder, '\0'); // Extract out the raw text of the number. digitString = StringBuilder_ToString(digitBuilder); digits = String_GetBytes(digitString); // Get any trailing type identifiers. suffix = CollectAlphanumericSuffix(lexer); // And make sure the result is clean. if (!EnsureEndOfNumber(lexer)) { token->text = IllegalRealValueMessage; return END_TOKEN(TOKEN_ERROR); } suffixText = String_GetBytes(suffix); if (suffixText[0] == '\0') { // Real64. if (!Real64_TryParse(digitString, &token->data.real64)) { token->text = IllegalRealValueMessage; return END_TOKEN(TOKEN_ERROR); } token->text = digitString; return END_TOKEN(TOKEN_REAL64); } else if (suffixText[0] == 'F' || suffixText[0] == 'f') { if (suffixText[1] == '\0') { // Float64. float64 = strtod(digits, NULL); token->data.float64 = float64; token->text = digitString; return END_TOKEN(TOKEN_FLOAT64); } else goto badSuffix; } else if (suffixText[0] == 'L' || suffixText[0] == 'l') { // 128-bit something-or-other. if (suffixText[1] == '\0') { // Real128. if (!Real128_TryParse(digitString, &token->data.real128)) { token->text = IllegalRealValueMessage; return END_TOKEN(TOKEN_ERROR); } token->text = String_Concat(digitString, suffix); return END_TOKEN(TOKEN_REAL128); } else if ((suffixText[1] == 'F' || suffixText[1] == 'f') && suffixText[2] == '\0') { // Float128 (not yet supported). goto badSuffix; } else goto badSuffix; } else if (suffixText[0] == 'H' || suffixText[0] == 'h') { // 32-bit something-or-other. if (suffixText[1] == '\0') { // Real32. if (!Real32_TryParse(digitString, &token->data.real32)) { token->text = IllegalRealValueMessage; return END_TOKEN(TOKEN_ERROR); } token->text = String_Concat(digitString, suffix); return END_TOKEN(TOKEN_REAL32); } else if ((suffixText[1] == 'F' || suffixText[1] == 'f') && suffixText[2] == '\0') { // Float32. float64 = strtod(digits, NULL); token->data.float32 = (Float32)float64; token->text = digitString; return END_TOKEN(TOKEN_FLOAT32); } else goto badSuffix; } else goto badSuffix; badSuffix: token->text = String_FormatString(IllegalNumericSuffixMessage, suffix); return END_TOKEN(TOKEN_ERROR); }