/** * raptor_string_escaped_write: * @string: UTF-8 string to write * @len: length of UTF-8 string * @delim: Terminating delimiter character for string (such as " or >) or \0 for no escaping. * @flags: bit flags - see #raptor_escaped_write_bitflags * @iostr: #raptor_iostream to write to * * Write a UTF-8 string formatted using different escapes to a #raptor_iostream * * Supports writing escapes in the Python, N-Triples, Turtle, JSON, mKR, * SPARQL styles to an iostream. * * Return value: non-0 on failure such as bad UTF-8 encoding. **/ int raptor_string_escaped_write(const unsigned char *string, size_t len, const char delim, unsigned int flags, raptor_iostream *iostr) { unsigned char c; int unichar_len; raptor_unichar unichar; if(!string) return 1; for(; (c=*string); string++, len--) { if((delim && c == delim && (delim == '\'' || delim == '"')) || c == '\\') { raptor_iostream_write_byte('\\', iostr); raptor_iostream_write_byte(c, iostr); continue; } if(delim && c == delim) { raptor_iostream_counted_string_write("\\u", 2, iostr); raptor_iostream_hexadecimal_write(c, 4, iostr); continue; } if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_SPARQL_URI_ESCAPES) { /* Must escape #x00-#x20<>\"{}|^` */ if(c <= 0x20 || c == '<' || c == '>' || c == '\\' || c == '"' || c == '{' || c == '}' || c == '|' || c == '^' || c == '`') { raptor_iostream_counted_string_write("\\u", 2, iostr); raptor_iostream_hexadecimal_write(c, 4, iostr); continue; } else if(c < 0x7f) { raptor_iostream_write_byte(c, iostr); continue; } } if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_TNRU) { if(c == 0x09) { raptor_iostream_counted_string_write("\\t", 2, iostr); continue; } else if(c == 0x0a) { raptor_iostream_counted_string_write("\\n", 2, iostr); continue; } else if(c == 0x0d) { raptor_iostream_counted_string_write("\\r", 2, iostr); continue; } else if(c < 0x20 || c == 0x7f) { raptor_iostream_counted_string_write("\\u", 2, iostr); raptor_iostream_hexadecimal_write(c, 4, iostr); continue; } } if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_BF) { if(c == 0x08) { /* JSON has \b for backspace */ raptor_iostream_counted_string_write("\\b", 2, iostr); continue; } else if(c == 0x0b) { /* JSON has \f for formfeed */ raptor_iostream_counted_string_write("\\f", 2, iostr); continue; } } /* Just format remaining characters */ if(c < 0x7f) { raptor_iostream_write_byte(c, iostr); continue; } /* It is unicode */ unichar_len = raptor_unicode_utf8_string_get_char(string, len, &unichar); if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > len) /* UTF-8 encoding had an error or ended in the middle of a string */ return 1; if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8) { /* UTF-8 is allowed so no need to escape */ raptor_iostream_counted_string_write(string, unichar_len, iostr); } else { if(unichar < 0x10000) { raptor_iostream_counted_string_write("\\u", 2, iostr); raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), 4, iostr); } else { raptor_iostream_counted_string_write("\\U", 2, iostr); raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), 8, iostr); } }
/** * raptor_string_python_write: * @string: UTF-8 string to write * @len: length of UTF-8 string * @delim: Terminating delimiter character for string (such as " or >) * or \0 for no escaping. * @flags: flags 0=N-Triples mode, 1=Turtle (allow raw UTF-8), 2=Turtle long string (allow raw UTF-8), 3=JSON * @iostr: #raptor_iostream to write to * * Write a UTF-8 string using Python-style escapes (N-Triples, Turtle, JSON) to an iostream. * * Return value: non-0 on failure such as bad UTF-8 encoding. **/ int raptor_string_python_write(const unsigned char *string, size_t len, const char delim, int flags, raptor_iostream *iostr) { unsigned char c; int unichar_len; raptor_unichar unichar; if(flags < 0 || flags > 3) return 1; for(; (c=*string); string++, len--) { if((delim && c == delim && (delim == '\'' || delim == '"')) || c == '\\') { raptor_iostream_write_byte('\\', iostr); raptor_iostream_write_byte(c, iostr); continue; } if(delim && c == delim) { raptor_iostream_counted_string_write("\\u", 2, iostr); raptor_iostream_hexadecimal_write(c, 4, iostr); continue; } if(flags != 2) { /* N-Triples, Turtle or JSON */ /* Note: NTriples is ASCII */ if(c == 0x09) { raptor_iostream_counted_string_write("\\t", 2, iostr); continue; } else if((flags == 3) && c == 0x08) { /* JSON has \b for backspace */ raptor_iostream_counted_string_write("\\b", 2, iostr); continue; } else if(c == 0x0a) { raptor_iostream_counted_string_write("\\n", 2, iostr); continue; } else if((flags == 3) && c == 0x0b) { /* JSON has \f for formfeed */ raptor_iostream_counted_string_write("\\f", 2, iostr); continue; } else if(c == 0x0d) { raptor_iostream_counted_string_write("\\r", 2, iostr); continue; } else if(c < 0x20|| c == 0x7f) { raptor_iostream_counted_string_write("\\u", 2, iostr); raptor_iostream_hexadecimal_write(c, 4, iostr); continue; } else if(c < 0x80) { raptor_iostream_write_byte(c, iostr); continue; } } else if(c < 0x80) { /* Turtle long string has no escapes except delim */ raptor_iostream_write_byte(c, iostr); continue; } /* It is unicode */ unichar_len = raptor_unicode_utf8_string_get_char(string, len, NULL); if(unichar_len < 0 || unichar_len > (int)len) /* UTF-8 encoding had an error or ended in the middle of a string */ return 1; if(flags >= 1 && flags <= 3) { /* Turtle and JSON are UTF-8 - no need to escape */ raptor_iostream_counted_string_write(string, unichar_len, iostr); } else { unichar_len = raptor_unicode_utf8_string_get_char(string, len, &unichar); if(unichar < 0x10000) { raptor_iostream_counted_string_write("\\u", 2, iostr); raptor_iostream_hexadecimal_write(unichar, 4, iostr); } else { raptor_iostream_counted_string_write("\\U", 2, iostr); raptor_iostream_hexadecimal_write(unichar, 8, iostr); } } unichar_len--; /* since loop does len-- */ string += unichar_len; len -= unichar_len; } return 0; }