Beispiel #1
0
// Process a triple quoted string, the leading """ of which has been seen, but
// not consumed
static token_t* triple_string(lexer_t* lexer)
{
  consume_chars(lexer, 3);  // Leading """

  while(true)
  {
    if(is_eof(lexer))
      return literal_doesnt_terminate(lexer);

    char c = look(lexer);

    if((c == '\"') && (lookn(lexer, 2) == '\"') && (lookn(lexer, 3) == '\"'))
    {
      consume_chars(lexer, 3);

      // Triple strings can end with 3 or more "s. If there are more than 3
      // the extra ones are part of the string contents
      while(look(lexer) == '\"')
      {
        append_to_token(lexer, '\"');
        consume_chars(lexer, 1);
      }

      normalise_string(lexer);
      return make_token_with_text(lexer, TK_STRING);
    }

    consume_chars(lexer, 1);
    append_to_token(lexer, c);
  }
}
Beispiel #2
0
// Process a block comment the leading / * for which has been seen, but not
// consumed
static token_t* nested_comment(lexer_t* lexer)
{
  consume_chars(lexer, 2); // Leading / *
  size_t depth = 1;

  while(depth > 0)
  {
    if(lexer->len <= 1)
    {
      lex_error(lexer, "Nested comment doesn't terminate");
      lexer->ptr += lexer->len;
      lexer->len = 0;
      return make_token(lexer, TK_LEX_ERROR);
    }

    if(look(lexer) == '*' && lookn(lexer, 2) == '/')
    {
      consume_chars(lexer, 2);
      depth--;
    }
    else if(look(lexer) == '/' && lookn(lexer, 2) == '*')
    {
      consume_chars(lexer, 2);
      depth++;
    }
    else
    {
      consume_chars(lexer, 1);
    }
  }

  lexer->newline = false;
  return NULL;
}
Beispiel #3
0
// Process a slash, which has been seen, but not consumed
static token_t* slash(lexer_t* lexer)
{
  if(lookn(lexer, 2) == '*')
    return nested_comment(lexer);

  if(lookn(lexer, 2) == '/')
    return line_comment(lexer);

  consume_chars(lexer, 1);
  return make_token(lexer, TK_DIVIDE);
}
Beispiel #4
0
// Process a triple quoted string, the leading """ of which has been seen, but
// not consumed
static token_t* triple_string(lexer_t* lexer)
{
  consume_chars(lexer, 3);  // Leading """

  while(true)
  {
    if(is_eof(lexer))
      return literal_doesnt_terminate(lexer);

    char c = look(lexer);

    if((c == '\"') && (lookn(lexer, 2) == '\"') && (lookn(lexer, 3) == '\"'))
    {
      consume_chars(lexer, 3);
      normalise_string(lexer);
      return make_token_with_text(lexer, TK_STRING);
    }

    consume_chars(lexer, 1);
    append_to_token(lexer, c);
  }
}
Beispiel #5
0
// Process a string or character escape sequence, the leading \ of which has
// been seen but not consumed.
// Errors are reported at the start of the sequence (ie the \ ).
// Returns the escape value or <0 on error.
static int escape(lexer_t* lexer, bool unicode_allowed)
{
  // Record the start position of the escape sequence for error reporting
  const char* start = &lexer->source->m[lexer->ptr];
  size_t line = lexer->line;
  size_t pos = lexer->pos;

  char c = lookn(lexer, 2);
  consume_chars(lexer, 2);
  int value = -2; // Default is 2 bad characters, \ and whatever follows it
  int hex_digits = 0;

  switch(c)
  {
    case 'a':  value = 0x07; break;
    case 'b':  value = 0x08; break;
    case 'e':  value = 0x1B; break;
    case 'f':  value = 0x0C; break;
    case 'n':  value = 0x0A; break;
    case 'r':  value = 0x0D; break;
    case 't':  value = 0x09; break;
    case 'v':  value = 0x0B; break;
    case '\"': value = 0x22; break;
    case '\'': value = 0x27; break;
    case '\\': value = 0x5C; break;
    case '0':  value = 0x00; break;
    case 'x': hex_digits = 2; break;

    case 'u':
      if(unicode_allowed)
        hex_digits = 4;
      break;

    case 'U':
      if(unicode_allowed)
        hex_digits = 6;
      break;
  }

  if(hex_digits > 0)
  {
    value = read_hex_escape(lexer, hex_digits);

    if(value < 0)
    {
      lex_error_at(lexer, line, pos,
        "Invalid escape sequence \"%.*s\", %d hex digits required",
        -value, start, hex_digits);
      return -1;
    }

    if(value > 0x10FFFF)
    {
      lex_error_at(lexer, line, pos,
        "Escape sequence \"%8s\" exceeds unicode range (0x10FFFF)", start);
      return -1;
    }
  }

  if(value < 0)
  {
    lex_error_at(lexer, line, pos, "Invalid escape sequence \"%.*s\"",
      -value, start);

    return -1;
  }

  return value;
}