Exemple #1
0
// Process a triple quoted string, the leading """ of which has been seen, but
// not consumed
static token_t* triple_string(lexer_t* lexer)
{
  consume_chars(lexer, 3);  // Leading """

  while(true)
  {
    if(is_eof(lexer))
      return literal_doesnt_terminate(lexer);

    char c = look(lexer);

    if((c == '\"') && (lookn(lexer, 2) == '\"') && (lookn(lexer, 3) == '\"'))
    {
      consume_chars(lexer, 3);

      // Triple strings can end with 3 or more "s. If there are more than 3
      // the extra ones are part of the string contents
      while(look(lexer) == '\"')
      {
        append_to_token(lexer, '\"');
        consume_chars(lexer, 1);
      }

      normalise_string(lexer);
      return make_token_with_text(lexer, TK_STRING);
    }

    consume_chars(lexer, 1);
    append_to_token(lexer, c);
  }
}
Exemple #2
0
// Append the given value to the current token text, UTF-8 encoded
static void append_utf8(lexer_t* lexer, int value)
{
  assert(value >= 0 && value <= 0x10FFFF);

  if(value <= 0x7F)
  {
    append_to_token(lexer, (char)(value & 0x7F));
  }
  else if(value <= 0x7FF)
  {
    append_to_token(lexer, (char)(0xC0 | (value >> 6)));
    append_to_token(lexer, (char)(0x80 | (value & 0x3F)));
  }
Exemple #3
0
// Make a token with the specified ID and current token text
static token_t* make_token_with_text(lexer_t* lexer, token_id id)
{
  token_t* t = make_token(lexer, id);
  append_to_token(lexer, '\0');
  token_set_string(t, stringtab(lexer->buffer));
  return t;
}
Exemple #4
0
void nmea_process_character(char c) {
    switch (c) {
    case '$': /* a new sentence is starting */
        sentence_started();
        /* reset and enable checksum calculation */
        checksum = 0;
        checksum_state = CS_CALC;
        break;
    case ',':
        token_finished();
        break;
    case '*': /* checksum is following */
        token_finished();
        checksum_state = CS_READ;
        break;
    case '\r':
        /* \n is following soon, we ignore this */
        break;
    case '\n':
        token_finished();
        ATOMIC(ATOMIC_FORCEON) {
            sentence_finished();
        }
        checksum_state = CS_UNKNOWN;
        break;
    default:
        append_to_token(c);
    }
    if (checksum_state == CS_CALC && c != '$') {
        add_to_checksum(c);
    }
}
Exemple #5
0
// Process a triple quoted string, the leading """ of which has been seen, but
// not consumed
static token_t* triple_string(lexer_t* lexer)
{
  consume_chars(lexer, 3);  // Leading """

  while(true)
  {
    if(is_eof(lexer))
      return literal_doesnt_terminate(lexer);

    char c = look(lexer);

    if((c == '\"') && (lookn(lexer, 2) == '\"') && (lookn(lexer, 3) == '\"'))
    {
      consume_chars(lexer, 3);
      normalise_string(lexer);
      return make_token_with_text(lexer, TK_STRING);
    }

    consume_chars(lexer, 1);
    append_to_token(lexer, c);
  }
}
Exemple #6
0
/*
   Parse a field into tokens as defined by rfc822.
 */
static TOKEN
parse_field (HDR_LINE hdr)
{
  static const char specials[] = "<>@.,;:\\[]\"()";
  static const char specials2[] = "<>@.,;:";
  static const char tspecials[] = "/?=<>@,;:\\[]\"()";
  static const char tspecials2[] = "/?=<>@.,;:";  /* FIXME: really
                                                     include '.'?*/
  static struct
  {
    const unsigned char *name;
    size_t namelen;
  } tspecial_header[] = {
    { "Content-Type", 12},
    { "Content-Transfer-Encoding", 25},
    { "Content-Disposition", 19},
    { NULL, 0}
  };
  const char *delimiters;
  const char *delimiters2;
  const unsigned char *line, *s, *s2;
  size_t n;
  int i, invalid = 0;
  TOKEN t, tok, *tok_tail;

  errno = 0;
  if (!hdr)
    return NULL;

  tok = NULL;
  tok_tail = &tok;

  line = hdr->line;
  if (!(s = strchr (line, ':')))
    return NULL; /* oops */

  n = s - line;
  if (!n)
    return NULL; /* oops: invalid name */

  delimiters = specials;
  delimiters2 = specials2;
  for (i = 0; tspecial_header[i].name; i++)
    {
      if (n == tspecial_header[i].namelen
	  && !memcmp (line, tspecial_header[i].name, n))
	{
	  delimiters = tspecials;
	  delimiters2 = tspecials2;
	  break;
	}
    }

  s++; /* Move over the colon. */
  for (;;)
    {
      while (!*s)
	{
	  if (!hdr->next || !hdr->next->cont)
            return tok; /* Ready.  */

          /* Next item is a header continuation line.  */
	  hdr = hdr->next;
	  s = hdr->line;
	}

      if (*s == '(')
	{
	  int level = 1;
	  int in_quote = 0;

	  invalid = 0;
	  for (s++;; s++)
	    {
	      while (!*s)
		{
		  if (!hdr->next || !hdr->next->cont)
		    goto oparen_out;
                  /* Next item is a header continuation line.  */
		  hdr = hdr->next;
		  s = hdr->line;
		}

	      if (in_quote)
		{
		  if (*s == '\"')
		    in_quote = 0;
		  else if (*s == '\\' && s[1])	/* what about continuation? */
		    s++;
		}
	      else if (*s == ')')
		{
		  if (!--level)
		    break;
		}
	      else if (*s == '(')
		level++;
	      else if (*s == '\"')
		in_quote = 1;
	    }
        oparen_out:
	  if (!*s)
	    ; /* Actually this is an error, but we don't care about it. */
	  else
	    s++;
	}
      else if (*s == '\"' || *s == '[')
	{
	  /* We do not check for non-allowed nesting of domainliterals */
	  int term = *s == '\"' ? '\"' : ']';
	  invalid = 0;
	  s++;
	  t = NULL;

	  for (;;)
	    {
	      for (s2 = s; *s2; s2++)
		{
		  if (*s2 == term)
		    break;
		  else if (*s2 == '\\' && s2[1]) /* what about continuation? */
		    s2++;
		}

	      t = (t
                   ? append_to_token (t, s, s2 - s)
                   : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s));
              if (!t)
                goto failure;

	      if (*s2 || !hdr->next || !hdr->next->cont)
		break;
              /* Next item is a header continuation line.  */
	      hdr = hdr->next;
	      s = hdr->line;
	    }
	  *tok_tail = t;
	  tok_tail = &t->next;
	  s = s2;
	  if (*s)
	    s++; /* skip the delimiter */
	}
      else if ((s2 = strchr (delimiters2, *s)))
	{ /* Special characters which are not handled above. */
	  invalid = 0;
	  t = new_token (tSPECIAL, s, 1);
          if (!t)
            goto failure;
	  *tok_tail = t;
	  tok_tail = &t->next;
	  s++;
	}
      else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
	{
	  invalid = 0;
	  s = skip_ws (s + 1);
	}
      else if (*s > 0x20 && !(*s & 128))
	{ /* Atom. */
	  invalid = 0;
	  for (s2 = s + 1; *s2 > 0x20
	       && !(*s2 & 128) && !strchr (delimiters, *s2); s2++)
	    ;
	  t = new_token (tATOM, s, s2 - s);
          if (!t)
            goto failure;
	  *tok_tail = t;
	  tok_tail = &t->next;
	  s = s2;
	}
      else
	{ /* Invalid character. */
	  if (!invalid)
	    { /* For parsing we assume only one space. */
	      t = new_token (tSPACE, NULL, 0);
              if (!t)
                goto failure;
	      *tok_tail = t;
	      tok_tail = &t->next;
	      invalid = 1;
	    }
	  s++;
	}
    }
  /*NOTREACHED*/

 failure:
  {
    int save = errno;
    release_token_list (tok);
    errno = save;
  }
  return NULL;
}
Exemple #7
0
/****************
 * Parse a field into tokens as defined by rfc822.
 */
static TOKEN
parse_field( HDR_LINE hdr )
{
    static const char specials[] = "<>@.,;:\\[]\"()";
    static const char specials2[]= "<>@.,;:";
    static const char tspecials[] = "/?=<>@,;:\\[]\"()";
    static const char tspecials2[]= "/?=<>@.,;:";
    static struct {
        const char *name;
        int namelen;
    } tspecial_header[] = {
        { "Content-Type", 12 },
        { "Content-Transfer-Encoding", 25 },
        { NULL, 0 }
    };
    const char *delimiters;
    const char *delimiters2;
    const char *line, *s, *s2;
    size_t n;
    int i, invalid = 0;
    TOKEN t, tok, *tok_head;

    if( !hdr )
        return NULL;

    tok = NULL;
    tok_head = &tok;

    line = hdr->line;
    if( !(s = strchr( line, ':' )) )
        return NULL; /* oops */

    n = s - line;
    if( !n )
        return NULL; /* oops: invalid name */
    delimiters	= specials;
    delimiters2 = specials2;
    for(i=0; tspecial_header[i].name; i++ ) {
        if( n == tspecial_header[i].namelen
                && !memicmp( line, tspecial_header[i].name, n ) )
        {
            delimiters	= tspecials;
            delimiters2 = tspecials2;
            break;
        }
    }

    /* Add this point we could store the fieldname in the parsing structure.
     * If we decide to do this, we should lowercase the name except for the
     * first character which should be uppercased.  This way we don't
     * need to apply the case insensitive compare in the future
     */

    s++; /* move  over the colon */
    for(;;) {
        if( !*s ) {
            if( !hdr->next || !hdr->next->cont )
                break;
            hdr = hdr->next;
            s = hdr->line;
        }

        if( *s == '(' ) {
            int level = 1;
            int in_quote = 0;

            invalid = 0;
            for(s++ ; ; s++ ) {
                if( !*s ) {
                    if( !hdr->next || !hdr->next->cont )
                        break;
                    hdr = hdr->next;
                    s = hdr->line;
                }

                if( in_quote ) {
                    if( *s == '\"' )
                        in_quote = 0;
                    else if( *s == '\\' && s[1] ) /* what about continuation?*/
                        s++;
                }
                else if( *s == ')' ) {
                    if( !--level )
                        break;
                }
                else if( *s == '(' )
                    level++;
                else if( *s == '\"' )
                    in_quote = 1;
            }
            if( !*s )
                ;/* actually this is an error, but we don't care about it */
            else
                s++;
        }
        else if( *s == '\"' || *s == '[' ) {
            /* We do not check for non-allowed nesting of domainliterals */
            int term = *s == '\"' ? '\"' : ']';
            invalid = 0;
            s++;
            t = NULL;

            for(;;) {
                for( s2 = s; *s2; s2++ ) {
                    if( *s2 == term )
                        break;
                    else if( *s2 == '\\' && s2[1] ) /* what about continuation?*/
                        s2++;
                }

                t = t ? append_to_token( t, s, s2-s)
                    : new_token( term == '\"'? tQUOTED
                                 : tDOMAINLIT, s, s2-s);

                if( *s2 || !hdr->next || !hdr->next->cont )
                    break;
                hdr = hdr->next;
                s = hdr->line;
            }
            *tok_head = t;
            tok_head = &t->next;
            s = s2;
            if( *s )
                s++; /* skip the delimiter */
        }
        else if( (s2 = strchr( delimiters2, *s )) ) {
            /* special characters which are not handled above */
            invalid = 0;
            t = new_token( tSPECIAL, s, 1 );
            *tok_head = t;
            tok_head = &t->next;
            s++;
        }
        else if( *s == ' ' || *s == '\t' || *s == '\r' || *s == '\n' ) {
            invalid = 0;
            s = skip_ws(s+1);
        }
        else if( *s > 0x20 && !(*s & 128) ) { /* atom */
            invalid = 0;
            for( s2 = s+1; *s2 > 0x20
                    && !(*s2 & 128 )
                    && !strchr( delimiters, *s2 ); s2++ )
                ;
            t = new_token( tATOM, s, s2-s );
            *tok_head = t;
            tok_head = &t->next;
            s = s2;
        }
        else {	/* invalid character */
            if( !invalid ) { /* for parsing we assume only one space */
                t = new_token( tSPACE, NULL, 0);
                *tok_head = t;
                tok_head = &t->next;
                invalid = 1;
            }
            s++;
        }
    }
    return tok;
}
Exemple #8
0
/**
* Removes longest common prefix indentation from every line in a triple
* quoted string. If the string begins with an empty line, that line is removed
* entirely.
*/
static void normalise_string(lexer_t* lexer)
{
  if(lexer->buflen == 0)
    return;

  // Make sure we have a null terminated string.
  append_to_token(lexer, '\0');

  // If we aren't multiline, do nothing.
  if(memchr(lexer->buffer, '\n', lexer->buflen) == NULL)
    return;

  // Calculate leading whitespace.
  char* buf = lexer->buffer;
  size_t ws = lexer->buflen;
  size_t ws_this_line = 0;
  bool in_leading_ws = true;

  for(size_t i = 0; i < lexer->buflen; i++)
  {
    char c = lexer->buffer[i];

    if(in_leading_ws)
    {
      if(c == ' ' || c == '\t')
      {
        ws_this_line++;
      }
      else if((c != '\r') && (c != '\n'))
      {
        if(ws_this_line < ws)
          ws = ws_this_line;

        in_leading_ws = false;
      }
    }

    if(c == '\n')
    {
      ws_this_line = 0;
      in_leading_ws = true;
    }
  }

  // Trim leading whitespace on each line.
  if(ws > 0)
  {
    char* line_start = lexer->buffer;
    char* compacted = lexer->buffer;
    size_t rem = lexer->buflen;

    while(rem > 0)
    {
      char* line_end = strchr(line_start, '\n');
      size_t line_len =
        (line_end == NULL) ? rem : (size_t)(line_end - line_start + 1);

      if(line_start != line_end)
      {
        size_t trim = (line_len < ws) ? line_len : ws;
        memmove(compacted, line_start + trim, line_len - trim);
        compacted += line_len - trim;
      }
      else {
        memmove(compacted, line_start, line_len);
        compacted += line_len;
      }

      line_start += line_len;
      rem -= line_len;
    }
  }

  // Trim a leading newline if there is one.
  buf = lexer->buffer;

  if((buf[0] == '\r') && (buf[1] == '\n'))
  {
    lexer->buflen -= 2;
    memmove(&buf[0], &buf[2], lexer->buflen);
  }
  else if(buf[0] == '\n') {
    lexer->buflen--;
    memmove(&buf[0], &buf[1], lexer->buflen);
  }
}