示例#1
0
static dsk_boolean
dsk_whitespace_trimmer_process    (DskSyncFilter *filter,
                                   DskBuffer      *out,
                                   unsigned        in_length,
                                   const uint8_t  *in_data,
                                   DskError      **error)
{
  DskWhitespaceTrimmer *trimmer = (DskWhitespaceTrimmer *) filter;
  uint8_t in_space = trimmer->in_space;
  DSK_UNUSED (error);
  while (in_length > 0)
    {
      uint8_t c = *in_data++;
      if (dsk_ascii_isspace (c))
        {
          in_space = DSK_TRUE;
        }
      else
        {
          if (in_space)
            {
              if (trimmer->in_initial_space)
                trimmer->in_initial_space = DSK_FALSE;
              else
                dsk_buffer_append_byte (out, ' ');
            }
          in_space = DSK_FALSE;
          dsk_buffer_append_byte (out, c);
        }
      in_length--;
    }
  trimmer->in_space = in_space;
  return DSK_TRUE;
}
示例#2
0
static dsk_boolean
dsk_hex_decoder_process (DskOctetFilter *filter,
                            DskBuffer      *out,
                            unsigned        in_length,
                            const uint8_t  *in_data,
                            DskError      **error)
{
  DskHexDecoder *hexdec = (DskHexDecoder *) filter;
  DSK_UNUSED (error);
  while (in_length)
    {
      if (dsk_ascii_isxdigit (*in_data))
        {
          if (hexdec->has_nibble)
            {
              dsk_buffer_append_byte (out,
                                      (hexdec->nibble << 4)
                                      | dsk_ascii_xdigit_value (*in_data));
              hexdec->has_nibble = DSK_FALSE;
            }
          else
            {
              hexdec->nibble = dsk_ascii_xdigit_value (*in_data);
              hexdec->has_nibble = DSK_TRUE;
            }
          in_data++;
          in_length--;
        }
      else if (dsk_ascii_isspace (*in_data))
        {
          in_data++;
          in_length--;
        }
      else
        {
          dsk_set_error (error, "bad character %s in hex-data",
                         dsk_ascii_byte_name (*in_data));
          return DSK_FALSE;
        }
    }
  return DSK_TRUE;
}
示例#3
0
/* --- lexing --- */
dsk_boolean
dsk_json_parser_feed     (DskJsonParser *parser,
                          size_t         n_bytes,
                          const uint8_t *bytes,
                          DskError     **error)
{
  while (n_bytes > 0)
    {
      switch (parser->lex_state)
        {
        case JSON_LEX_STATE_INIT:
          while (n_bytes > 0 && dsk_ascii_isspace (*bytes))
            {
              if (*bytes == '\n')
                parser->line_no++;
              bytes++;
              n_bytes--;
            }
          if (n_bytes == 0)
            break;
          switch (*bytes)
            {
            case 't': case 'T':
              parser->lex_state = JSON_LEX_STATE_TRUE;
              parser->fixed_n_chars = 1;
              bytes++;
              n_bytes--;
              break;
            case 'f': case 'F':
              parser->lex_state = JSON_LEX_STATE_FALSE;
              parser->fixed_n_chars = 1;
              bytes++;
              n_bytes--;
              break;
            case 'n': case 'N':
              parser->lex_state = JSON_LEX_STATE_NULL;
              parser->fixed_n_chars = 1;
              bytes++;
              n_bytes--;
              break;
            case '"':
              parser->lex_state = JSON_LEX_STATE_IN_DQ;
              parser->str_len = 0;
              bytes++;
              n_bytes--;
              break;
            case '-': case '+':
            case '0': case '1': case '2': case '3': case '4': 
            case '5': case '6': case '7': case '8': case '9': 
              parser->lex_state = JSON_LEX_STATE_IN_NUMBER;
              parser->str_len = 0;
              append_to_string_buffer (parser, 1, bytes);
              bytes++;
              n_bytes--;
              break;

#define WRITE_CHAR_TOKEN_CASE(character, SHORTNAME) \
            case character: \
              if (!handle_token (parser, JSON_TOKEN_##SHORTNAME, error)) \
                return DSK_FALSE; \
              n_bytes--; \
              bytes++; \
              break
            WRITE_CHAR_TOKEN_CASE('{', LBRACE);
            WRITE_CHAR_TOKEN_CASE('}', RBRACE);
            WRITE_CHAR_TOKEN_CASE('[', LBRACKET);
            WRITE_CHAR_TOKEN_CASE(']', RBRACKET);
            WRITE_CHAR_TOKEN_CASE(',', COMMA);
            WRITE_CHAR_TOKEN_CASE(':', COLON);
#undef WRITE_CHAR_TOKEN_CASE

            case '\n':
              parser->line_no++;
              n_bytes--;
              bytes++;
              break;
            case '\t': case '\r': case ' ':
              n_bytes--;
              bytes++;
              break;
            default:
              dsk_set_error (error,
                             "unexpected character %s in json (line %u)",
                             dsk_ascii_byte_name (*bytes), parser->line_no);
              return DSK_FALSE;
            }
          break;

#define WRITE_FIXED_BAREWORD_CASE(SHORTNAME, lc, UC, length) \
        case JSON_LEX_STATE_##SHORTNAME: \
          if (parser->fixed_n_chars == length) \
            { \
              /* are we at end of string? */ \
              if (dsk_ascii_isalnum (*bytes)) \
                { \
                  dsk_set_error (error,  \
                                 "got %s after '%s' (line %u)", \
                                 dsk_ascii_byte_name (*bytes), lc, \
                                 parser->line_no); \
                  return DSK_FALSE; \
                } \
              else \
                { \
                  parser->lex_state = JSON_LEX_STATE_INIT; \
                  if (!handle_token (parser, JSON_TOKEN_##SHORTNAME, \
                                     error)) \
                    return DSK_FALSE; \
                } \
            } \
          else if (*bytes == lc[parser->fixed_n_chars] \
                || *bytes == UC[parser->fixed_n_chars]) \
            { \
              parser->fixed_n_chars += 1; \
              n_bytes--; \
              bytes++; \
            } \
          else \
            { \
              dsk_set_error (error, \
                           "unexpected character %s (parsing %s) (line %u)", \
                           dsk_ascii_byte_name (*bytes), UC, parser->line_no); \
              return DSK_FALSE; \
            } \
          break;
        WRITE_FIXED_BAREWORD_CASE(TRUE, "true", "TRUE", 4);
        WRITE_FIXED_BAREWORD_CASE(FALSE, "false", "FALSE", 5);
        WRITE_FIXED_BAREWORD_CASE(NULL, "null", "NULL", 4);
#undef WRITE_FIXED_BAREWORD_CASE

        case JSON_LEX_STATE_IN_DQ:
          if (*bytes == '"')
            {
              // TODO ASSERT utf16_surrogate == 0
              if (!handle_token (parser, JSON_TOKEN_STRING, error))
                return DSK_FALSE;
              bytes++;
              n_bytes--;
              parser->lex_state = JSON_LEX_STATE_INIT;
            }
          else if (*bytes == '\\')
            {
              n_bytes--;
              bytes++;
              parser->bs_sequence_len = 0;
              parser->lex_state = JSON_LEX_STATE_IN_DQ_BS;
            }
          else
            {
              // TODO ASSERT utf16_surrogate == 0
              unsigned i;
              if (*bytes == '\n')
                parser->line_no++;
              for (i = 1; i < n_bytes; i++)
                if (bytes[i] == '"' || bytes[i] == '\\')
                  break;
                else if (bytes[i] == '\n')
                  parser->line_no++;
              append_to_string_buffer (parser, i, bytes);
              n_bytes -= i;
              bytes += i;
            }
          break;
        case JSON_LEX_STATE_IN_DQ_BS:
          if (parser->bs_sequence_len == 0)
            {
              switch (*bytes)
                {
#define WRITE_BS_CHAR_CASE(bschar, cchar) \
                case bschar: \
                  /* TODO ASSERT utf16_surrogate == 0 */ \
                  append_char_to_string_buffer (parser, cchar); \
                  bytes++; \
                  n_bytes--; \
                  parser->lex_state = JSON_LEX_STATE_IN_DQ; \
                  break
                WRITE_BS_CHAR_CASE('b', '\b');
                WRITE_BS_CHAR_CASE('f', '\f');
                WRITE_BS_CHAR_CASE('n', '\n');
                WRITE_BS_CHAR_CASE('r', '\r');
                WRITE_BS_CHAR_CASE('t', '\t');
                WRITE_BS_CHAR_CASE('/', '/');
                WRITE_BS_CHAR_CASE('"', '"');
                WRITE_BS_CHAR_CASE('\\', '\\');
#undef WRITE_BS_CHAR_CASE
                case 'u':
                  parser->bs_sequence[parser->bs_sequence_len++] = *bytes++;
                  n_bytes--;
                  break;
                default:
                  dsk_set_error (error,
                               "invalid character %s after '\\' (line %u)",
                               dsk_ascii_byte_name (*bytes), parser->line_no);
                  return DSK_FALSE;
                }
            }
          else
            {
              /* must be \uxxxx (the only multi-character \ sequence) */
              if (!dsk_ascii_isxdigit (*bytes))
                {
                  dsk_set_error (error,
                               "expected 4 hex digits after \\u, got %s (line %u)",
                               dsk_ascii_byte_name (*bytes), parser->line_no);
                  return DSK_FALSE;
                }
              parser->bs_sequence[parser->bs_sequence_len++] = *bytes++;
              n_bytes--;
              if (parser->bs_sequence_len == 5)
                {
                  char utf8buf[8];
                  unsigned value;
                  parser->bs_sequence[5] = 0;
                  value = strtoul (parser->bs_sequence + 1, NULL, 16);
                  if (DSK_UTF16_LO_SURROGATE_START <= value
                   && value <= DSK_UTF16_LO_SURROGATE_END)
                    {
                      if (parser->utf16_surrogate == 0)
                        {
                          dsk_set_error (error,
                                       "low (second) half of surrogate pair was encountered without high-half, line %u",
                                       parser->line_no);
                          return DSK_FALSE;
                        }
                      uint32_t code = dsk_utf16_surrogate_pair_to_codepoint (parser->utf16_surrogate, value);
                      append_to_string_buffer (parser,
                                               dsk_utf8_encode_unichar (utf8buf, code),
                                               (const uint8_t *) utf8buf);
                      parser->utf16_surrogate = 0;
                    }
                  else if (DSK_UTF16_HI_SURROGATE_START <= value
                        && value <= DSK_UTF16_HI_SURROGATE_END)
                    {
                      if (parser->utf16_surrogate != 0)
                        {
                          dsk_set_error (error,
                                       "got two first-half surrogate pairs (UTF16 surrogate \\u%04u was followed by \\%04u), line %u",
                                       parser->utf16_surrogate, value, parser->line_no);
                          return DSK_FALSE;
                        }
                      parser->utf16_surrogate = value;
                    }
                  else
                    {
                      if (parser->utf16_surrogate != 0)
                        {
                          dsk_set_error (error,
                                       "second half of UTF16 surrogate \\u%04u was not preceded by utf16, line %u", 
                                       parser->utf16_surrogate, parser->line_no);
                          return DSK_FALSE;
                        }
                      append_to_string_buffer (parser,
                                               dsk_utf8_encode_unichar (utf8buf, value),
                                               (const uint8_t *) utf8buf);
                      parser->utf16_surrogate = 0;
                    }
                  parser->lex_state = JSON_LEX_STATE_IN_DQ;
                }
#if 0
              else
                {
                  dsk_set_error (error,
                               "internal error: expected 4 hex digits (line %u)",
                               parser->line_no);
                  return DSK_FALSE;
                }
#endif
            }
          break;
        case JSON_LEX_STATE_IN_NUMBER:
          if (dsk_ascii_isdigit (*bytes)
           || *bytes == '.'
           || *bytes == 'e'
           || *bytes == 'E'
           || *bytes == '+'
           || *bytes == '-')
            {
              append_to_string_buffer (parser, 1, bytes);
              bytes++;
              n_bytes--;
            }
          else
            {
              /* append the number token */
              if (!handle_token (parser, JSON_TOKEN_NUMBER, error))
                return DSK_FALSE;

              /* go back to init state (do not consume character) */
              parser->lex_state = JSON_LEX_STATE_INIT;
            }
          break;
        default:
          dsk_error ("unhandled lex state %u", parser->lex_state);
        }
    }
  return DSK_TRUE;
}