Exemplo n.º 1
0
/**
 * raptor_string_escaped_write:
 * @string: UTF-8 string to write
 * @len: length of UTF-8 string
 * @delim: Terminating delimiter character for string (such as " or >) or \0 for no escaping.
 * @flags: bit flags - see #raptor_escaped_write_bitflags
 * @iostr: #raptor_iostream to write to
 *
 * Write a UTF-8 string formatted using different escapes to a #raptor_iostream
 *
 * Supports writing escapes in the Python, N-Triples, Turtle, JSON, mKR,
 * SPARQL styles to an iostream.
 * 
 * Return value: non-0 on failure such as bad UTF-8 encoding.
 **/
int
raptor_string_escaped_write(const unsigned char *string,
                            size_t len,
                            const char delim,
                            unsigned int flags,
                            raptor_iostream *iostr)
{
  unsigned char c;
  int unichar_len;
  raptor_unichar unichar;

  if(!string)
    return 1;
  
  for(; (c=*string); string++, len--) {
    if((delim && c == delim && (delim == '\'' || delim == '"')) ||
       c == '\\') {
      raptor_iostream_write_byte('\\', iostr);
      raptor_iostream_write_byte(c, iostr);
      continue;
    }

    if(delim && c == delim) {
      raptor_iostream_counted_string_write("\\u", 2, iostr);
      raptor_iostream_hexadecimal_write(c, 4, iostr);
      continue;
    }
    
    if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_SPARQL_URI_ESCAPES) {
      /* Must escape #x00-#x20<>\"{}|^` */
      if(c <= 0x20 ||
         c == '<' || c == '>' || c == '\\' || c == '"' || 
         c == '{' || c == '}' || c == '|' || c == '^' || c == '`') {
        raptor_iostream_counted_string_write("\\u", 2, iostr);
        raptor_iostream_hexadecimal_write(c, 4, iostr);
        continue;
      } else if(c < 0x7f) {
        raptor_iostream_write_byte(c, iostr);
        continue;
      }
    }

    if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_TNRU) {
      if(c == 0x09) {
        raptor_iostream_counted_string_write("\\t", 2, iostr);
        continue;
      } else if(c == 0x0a) {
        raptor_iostream_counted_string_write("\\n", 2, iostr);
        continue;
      } else if(c == 0x0d) {
        raptor_iostream_counted_string_write("\\r", 2, iostr);
        continue;
      } else if(c < 0x20 || c == 0x7f) {
        raptor_iostream_counted_string_write("\\u", 2, iostr);
        raptor_iostream_hexadecimal_write(c, 4, iostr);
        continue;
      }
    }
    
    if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_BF) {
      if(c == 0x08) {
        /* JSON has \b for backspace */
        raptor_iostream_counted_string_write("\\b", 2, iostr);
        continue;
      } else if(c == 0x0b) {
        /* JSON has \f for formfeed */
        raptor_iostream_counted_string_write("\\f", 2, iostr);
        continue;
      }
    }

    /* Just format remaining characters */
    if(c < 0x7f) {
      raptor_iostream_write_byte(c, iostr);
      continue;
    } 
    
    /* It is unicode */    
    unichar_len = raptor_unicode_utf8_string_get_char(string, len, &unichar);
    if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > len)
      /* UTF-8 encoding had an error or ended in the middle of a string */
      return 1;

    if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8) {
      /* UTF-8 is allowed so no need to escape */
      raptor_iostream_counted_string_write(string, unichar_len, iostr);
    } else {
      if(unichar < 0x10000) {
        raptor_iostream_counted_string_write("\\u", 2, iostr);
        raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), 4, iostr);
      } else {
        raptor_iostream_counted_string_write("\\U", 2, iostr);
        raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), 8, iostr);
      }
    }
Exemplo n.º 2
0
/**
 * raptor_string_python_write:
 * @string: UTF-8 string to write
 * @len: length of UTF-8 string
 * @delim: Terminating delimiter character for string (such as " or >)
 * or \0 for no escaping.
 * @flags: flags 0=N-Triples mode, 1=Turtle (allow raw UTF-8), 2=Turtle long string (allow raw UTF-8), 3=JSON
 * @iostr: #raptor_iostream to write to
 *
 * Write a UTF-8 string using Python-style escapes (N-Triples, Turtle, JSON) to an iostream.
 * 
 * Return value: non-0 on failure such as bad UTF-8 encoding.
 **/
int
raptor_string_python_write(const unsigned char *string,
                           size_t len,
                           const char delim,
                           int flags,
                           raptor_iostream *iostr)
{
  unsigned char c;
  int unichar_len;
  raptor_unichar unichar;

  if(flags < 0 || flags > 3)
    return 1;
  
  for(; (c=*string); string++, len--) {
    if((delim && c == delim && (delim == '\'' || delim == '"')) ||
       c == '\\') {
      raptor_iostream_write_byte('\\', iostr);
      raptor_iostream_write_byte(c, iostr);
      continue;
    }
    if(delim && c == delim) {
      raptor_iostream_counted_string_write("\\u", 2, iostr);
      raptor_iostream_hexadecimal_write(c, 4, iostr);
      continue;
    }
    
    if(flags != 2) {
      /* N-Triples, Turtle or JSON */

      /* Note: NTriples is ASCII */
      if(c == 0x09) {
        raptor_iostream_counted_string_write("\\t", 2, iostr);
        continue;
      } else if((flags == 3) && c == 0x08) {
        /* JSON has \b for backspace */
        raptor_iostream_counted_string_write("\\b", 2, iostr);
        continue;
      } else if(c == 0x0a) {
        raptor_iostream_counted_string_write("\\n", 2, iostr);
        continue;
      } else if((flags == 3) && c == 0x0b) {
        /* JSON has \f for formfeed */
        raptor_iostream_counted_string_write("\\f", 2, iostr);
        continue;
      } else if(c == 0x0d) {
        raptor_iostream_counted_string_write("\\r", 2, iostr);
        continue;
      } else if(c < 0x20|| c == 0x7f) {
        raptor_iostream_counted_string_write("\\u", 2, iostr);
        raptor_iostream_hexadecimal_write(c, 4, iostr);
        continue;
      } else if(c < 0x80) {
        raptor_iostream_write_byte(c, iostr);
        continue;
      }
    } else if(c < 0x80) {
      /* Turtle long string has no escapes except delim */
      raptor_iostream_write_byte(c, iostr);
      continue;
    } 
    
    /* It is unicode */
    
    unichar_len = raptor_unicode_utf8_string_get_char(string, len, NULL);
    if(unichar_len < 0 || unichar_len > (int)len)
      /* UTF-8 encoding had an error or ended in the middle of a string */
      return 1;

    if(flags >= 1 && flags <= 3) {
      /* Turtle and JSON are UTF-8 - no need to escape */
      raptor_iostream_counted_string_write(string, unichar_len, iostr);
    } else {
      unichar_len = raptor_unicode_utf8_string_get_char(string, len, &unichar);

      if(unichar < 0x10000) {
        raptor_iostream_counted_string_write("\\u", 2, iostr);
        raptor_iostream_hexadecimal_write(unichar, 4, iostr);
      } else {
        raptor_iostream_counted_string_write("\\U", 2, iostr);
        raptor_iostream_hexadecimal_write(unichar, 8, iostr);
      }
    }
    
    unichar_len--; /* since loop does len-- */
    string += unichar_len; len -= unichar_len;

  }

  return 0;
}