// Process a triple quoted string, the leading """ of which has been seen, but // not consumed static token_t* triple_string(lexer_t* lexer) { consume_chars(lexer, 3); // Leading """ while(true) { if(is_eof(lexer)) return literal_doesnt_terminate(lexer); char c = look(lexer); if((c == '\"') && (lookn(lexer, 2) == '\"') && (lookn(lexer, 3) == '\"')) { consume_chars(lexer, 3); // Triple strings can end with 3 or more "s. If there are more than 3 // the extra ones are part of the string contents while(look(lexer) == '\"') { append_to_token(lexer, '\"'); consume_chars(lexer, 1); } normalise_string(lexer); return make_token_with_text(lexer, TK_STRING); } consume_chars(lexer, 1); append_to_token(lexer, c); } }
// Process a block comment the leading / * for which has been seen, but not // consumed static token_t* nested_comment(lexer_t* lexer) { consume_chars(lexer, 2); // Leading / * size_t depth = 1; while(depth > 0) { if(lexer->len <= 1) { lex_error(lexer, "Nested comment doesn't terminate"); lexer->ptr += lexer->len; lexer->len = 0; return make_token(lexer, TK_LEX_ERROR); } if(look(lexer) == '*' && lookn(lexer, 2) == '/') { consume_chars(lexer, 2); depth--; } else if(look(lexer) == '/' && lookn(lexer, 2) == '*') { consume_chars(lexer, 2); depth++; } else { consume_chars(lexer, 1); } } lexer->newline = false; return NULL; }
// Process a slash, which has been seen, but not consumed static token_t* slash(lexer_t* lexer) { if(lookn(lexer, 2) == '*') return nested_comment(lexer); if(lookn(lexer, 2) == '/') return line_comment(lexer); consume_chars(lexer, 1); return make_token(lexer, TK_DIVIDE); }
// Process a triple quoted string, the leading """ of which has been seen, but // not consumed static token_t* triple_string(lexer_t* lexer) { consume_chars(lexer, 3); // Leading """ while(true) { if(is_eof(lexer)) return literal_doesnt_terminate(lexer); char c = look(lexer); if((c == '\"') && (lookn(lexer, 2) == '\"') && (lookn(lexer, 3) == '\"')) { consume_chars(lexer, 3); normalise_string(lexer); return make_token_with_text(lexer, TK_STRING); } consume_chars(lexer, 1); append_to_token(lexer, c); } }
// Process a string or character escape sequence, the leading \ of which has // been seen but not consumed. // Errors are reported at the start of the sequence (ie the \ ). // Returns the escape value or <0 on error. static int escape(lexer_t* lexer, bool unicode_allowed) { // Record the start position of the escape sequence for error reporting const char* start = &lexer->source->m[lexer->ptr]; size_t line = lexer->line; size_t pos = lexer->pos; char c = lookn(lexer, 2); consume_chars(lexer, 2); int value = -2; // Default is 2 bad characters, \ and whatever follows it int hex_digits = 0; switch(c) { case 'a': value = 0x07; break; case 'b': value = 0x08; break; case 'e': value = 0x1B; break; case 'f': value = 0x0C; break; case 'n': value = 0x0A; break; case 'r': value = 0x0D; break; case 't': value = 0x09; break; case 'v': value = 0x0B; break; case '\"': value = 0x22; break; case '\'': value = 0x27; break; case '\\': value = 0x5C; break; case '0': value = 0x00; break; case 'x': hex_digits = 2; break; case 'u': if(unicode_allowed) hex_digits = 4; break; case 'U': if(unicode_allowed) hex_digits = 6; break; } if(hex_digits > 0) { value = read_hex_escape(lexer, hex_digits); if(value < 0) { lex_error_at(lexer, line, pos, "Invalid escape sequence \"%.*s\", %d hex digits required", -value, start, hex_digits); return -1; } if(value > 0x10FFFF) { lex_error_at(lexer, line, pos, "Escape sequence \"%8s\" exceeds unicode range (0x10FFFF)", start); return -1; } } if(value < 0) { lex_error_at(lexer, line, pos, "Invalid escape sequence \"%.*s\"", -value, start); return -1; } return value; }