コード例 #1
0
ファイル: snprintf.c プロジェクト: bbcarchdev/deb-raptor2
/**
 * raptor_format_integer:
 * @buffer: buffer (or NULL)
 * @bufsize: size of above (or 0)
 * @integer: integer value to format
 * @base: numeric base up to 36
 * @width: field width (or -1)
 * @padding: padding char (or \0)
 *
 * INTERNAL - Format an integer as a decimal into a buffer or
 * calculate the size needed.
 *
 * Works Like the C99 snprintf() but just for integers.
 *
 * If @buffer is NULL or the @bufsize is too small, the number of
 * bytes needed (excluding NUL) is returned and no formatting is done.
 *
 * Return value: number of bytes needed or written (excluding NUL) or 0 on failure
 */
size_t
raptor_format_integer(char* buffer, size_t bufsize, int integer,
                      unsigned int base, int width, char padding)
{
  size_t len = 1;
  char *p;
  unsigned int value;

  if(integer < 0) {
    value = (unsigned int)-integer;
    len++;
    width++;
  } else
    value = (unsigned int)integer;
  while(value /= base)
    len++;

  if(width > 0 && RAPTOR_GOOD_CAST(size_t, width) > len)
    len = width;

  if(!buffer || bufsize < RAPTOR_GOOD_CAST(size_t, (len + 1))) /* +1 for NUL */
    return len;

  if(!padding)
    padding = ' ';

  if(integer < 0)
    value = (unsigned int)-integer;
  else
    value = (unsigned int)integer;

  p = &buffer[len];
  *p-- = '\0';
  while(value  > 0 && p >= buffer) {
    *p-- = digits[value % base];
    value /= base;
  }
  while(p >= buffer)
    *p-- = padding;
  if(integer < 0)
    *buffer = '-';

  return len;
}
コード例 #2
0
/**
 * raptor_string_escaped_write:
 * @string: UTF-8 string to write
 * @len: length of UTF-8 string
 * @delim: Terminating delimiter character for string (such as " or >) or \0 for no escaping.
 * @flags: bit flags - see #raptor_escaped_write_bitflags
 * @iostr: #raptor_iostream to write to
 *
 * Write a UTF-8 string formatted using different escapes to a #raptor_iostream
 *
 * Supports writing escapes in the Python, N-Triples, Turtle, JSON, mKR,
 * SPARQL styles to an iostream.
 * 
 * Return value: non-0 on failure such as bad UTF-8 encoding.
 **/
int
raptor_string_escaped_write(const unsigned char *string,
                            size_t len,
                            const char delim,
                            unsigned int flags,
                            raptor_iostream *iostr)
{
  unsigned char c;
  int unichar_len;
  raptor_unichar unichar;

  if(!string)
    return 1;
  
  for(; (c=*string); string++, len--) {
    if((delim && c == delim && (delim == '\'' || delim == '"')) ||
       c == '\\') {
      raptor_iostream_write_byte('\\', iostr);
      raptor_iostream_write_byte(c, iostr);
      continue;
    }

    if(delim && c == delim) {
      raptor_iostream_counted_string_write("\\u", 2, iostr);
      raptor_iostream_hexadecimal_write(c, 4, iostr);
      continue;
    }
    
    if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_SPARQL_URI_ESCAPES) {
      /* Must escape #x00-#x20<>\"{}|^` */
      if(c <= 0x20 ||
         c == '<' || c == '>' || c == '\\' || c == '"' || 
         c == '{' || c == '}' || c == '|' || c == '^' || c == '`') {
        raptor_iostream_counted_string_write("\\u", 2, iostr);
        raptor_iostream_hexadecimal_write(c, 4, iostr);
        continue;
      } else if(c < 0x7f) {
        raptor_iostream_write_byte(c, iostr);
        continue;
      }
    }

    if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_TNRU) {
      if(c == 0x09) {
        raptor_iostream_counted_string_write("\\t", 2, iostr);
        continue;
      } else if(c == 0x0a) {
        raptor_iostream_counted_string_write("\\n", 2, iostr);
        continue;
      } else if(c == 0x0d) {
        raptor_iostream_counted_string_write("\\r", 2, iostr);
        continue;
      } else if(c < 0x20 || c == 0x7f) {
        raptor_iostream_counted_string_write("\\u", 2, iostr);
        raptor_iostream_hexadecimal_write(c, 4, iostr);
        continue;
      }
    }
    
    if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_BF) {
      if(c == 0x08) {
        /* JSON has \b for backspace */
        raptor_iostream_counted_string_write("\\b", 2, iostr);
        continue;
      } else if(c == 0x0b) {
        /* JSON has \f for formfeed */
        raptor_iostream_counted_string_write("\\f", 2, iostr);
        continue;
      }
    }

    /* Just format remaining characters */
    if(c < 0x7f) {
      raptor_iostream_write_byte(c, iostr);
      continue;
    } 
    
    /* It is unicode */    
    unichar_len = raptor_unicode_utf8_string_get_char(string, len, &unichar);
    if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > len)
      /* UTF-8 encoding had an error or ended in the middle of a string */
      return 1;

    if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8) {
      /* UTF-8 is allowed so no need to escape */
      raptor_iostream_counted_string_write(string, unichar_len, iostr);
    } else {
      if(unichar < 0x10000) {
        raptor_iostream_counted_string_write("\\u", 2, iostr);
        raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), 4, iostr);
      } else {
        raptor_iostream_counted_string_write("\\U", 2, iostr);
        raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), 8, iostr);
      }
    }
コード例 #3
0
ファイル: raptor_ntriples.c プロジェクト: Distrotech/raptor
/*
 * raptor_ntriples_parse_term_internal:
 * @world: raptor world
 * @locator: locator object (in/out) (or NULL)
 * @start: pointer to starting character of string (in)
 * @dest: destination of string (in)
 * @lenp: pointer to length of string (in/out)
 * @dest_lenp: pointer to length of destination string (out)
 * @end_char: string ending character
 * @class: string class
 *
 * INTERNAL - Parse an N-Triples term with escapes.
 *
 * Relies that @dest is long enough; it need only be as large as the
 * input string @start since when UTF-8 encoding, the escapes are
 * removed and the result is always less than or equal to length of
 * input.
 *
 * N-Triples strings / URIs are written in ASCII at present;
 * characters outside the printable ASCII range are discarded with a
 * warning.  See the grammar for full details of the allowed ranges.
 *
 * UTF-8 and the \u and \U esapes are both allowed.
 *
 * Return value: Non 0 on failure
 **/
static int
raptor_ntriples_parse_term_internal(raptor_world* world,
                                    raptor_locator* locator,
                                    const unsigned char **start,
                                    unsigned char *dest,
                                    size_t *lenp, size_t *dest_lenp,
                                    char end_char,
                                    raptor_ntriples_term_class term_class)
{
  const unsigned char *p = *start;
  unsigned char c = '\0';
  size_t ulen = 0;
  unsigned long unichar = 0;
  unsigned int position = 0;
  int end_char_seen = 0;

  /* find end of string, fixing backslashed characters on the way */
  while(*lenp > 0) {
    int unichar_width;

    c = *p;

    p++;
    (*lenp)--;
    if(locator) {
      locator->column++;
      locator->byte++;
    }

    if(c > 0x7f) {
      /* just copy the UTF-8 bytes through */
      int unichar_len;
      unichar_len = raptor_unicode_utf8_string_get_char(p - 1, 1 + *lenp, NULL);
      if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > *lenp) {
        raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "UTF-8 encoding error at character %d (0x%02X) found.", c, c);
        /* UTF-8 encoding had an error or ended in the middle of a string */
        return 1;
      }
      memmove(dest, p-1, unichar_len);
      dest += unichar_len;

      unichar_len--; /* p, *lenp were moved on by 1 earlier */

      p += unichar_len;
      (*lenp) -= unichar_len;
      if(locator) {
        locator->column += unichar_len;
        locator->byte += unichar_len;
      }
      continue;
    }

    if(c != '\\') {
      /* finish at non-backslashed end_char */
      if(end_char && c == end_char) {
        end_char_seen = 1;
        break;
      }

      if(!raptor_ntriples_term_valid(c, position, term_class)) {
        if(end_char) {
          /* end char was expected, so finding an invalid thing is an error */
          raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Missing terminating '%c' (found '%c')", end_char, c);
          return 0;
        } else {
          /* it's the end - so rewind 1 to save next char */
          p--;
          (*lenp)++;
          if(locator) {
            locator->column--;
            locator->byte--;
          }
          if(term_class == RAPTOR_TERM_CLASS_BNODEID && dest[-1] == '.') {
            /* If bnode id ended on '.' move back one */
            dest--;

            p--;
            (*lenp)++;
            if(locator) {
              locator->column--;
              locator->byte--;
            }
          }
          break;
        }
      }

      /* otherwise store and move on */
      *dest++ = c;
      position++;
      continue;
    }

    if(!*lenp) {
      raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "\\ at end of input.");
      return 0;
    }

    c = *p;

    p++;
    (*lenp)--;
    if(locator) {
      locator->column++;
      locator->byte++;
    }

    switch(c) {
      case '"':
      case '\\':
        *dest++ = c;
        break;
      case 'b':
        *dest++ = '\b';
        break;
      case 'f':
        *dest++ = '\f';
        break;
      case 'n':
        *dest++ = '\n';
        break;
      case 'r':
        *dest++ = '\r';
        break;
      case 't':
        *dest++ = '\t';
        break;
      case '<':
      case '>':
      case '{':
      case '}':
      case '|':
      case '^':
      case '`':
        /* Turtle 2013 allows these in URIs (as well as \" and \\) */
        *dest++ = c;
        break;

      case 'u':
      case 'U':
        ulen = (c == 'u') ? 4 : 8;

        if(*lenp < ulen) {
          raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "%c over end of input.", c);
          return 0;
        }

        if(1) {
          unsigned int ii;
          int n = 0;

          for(ii = 0; ii < ulen; ii++) {
            char cc = p[ii];
            if(!isxdigit(RAPTOR_GOOD_CAST(char, cc))) {
              raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "N-Triples string error - illegal hex digit %c in Unicode escape '%c%s...'",
                            cc, c, p);
              n = 1;
              break;
            }
          }

          if(n)
            break;

          n = sscanf((const char*)p, ((ulen == 4) ? "%04lx" : "%08lx"), &unichar);
          if(n != 1) {
            raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal Uncode escape '%c%s...'", c, p);
            break;
          }
        }

        p += ulen;
        (*lenp) -= ulen;
        if(locator) {
          locator->column += RAPTOR_GOOD_CAST(int, ulen);
          locator->byte += RAPTOR_GOOD_CAST(int, ulen);
        }

        if(unichar > raptor_unicode_max_codepoint) {
          raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal Unicode character with code point #x%lX (max #x%lX).", unichar, raptor_unicode_max_codepoint);
          break;
        }

        unichar_width = raptor_unicode_utf8_string_put_char(unichar, dest, 4);
        if(unichar_width < 0) {
          raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal Unicode character with code point #x%lX.", unichar);
          break;
        }

        /* The destination length is set here to 4 since we know that in
         * all cases, the UTF-8 encoded output sequence is always shorter
         * than the input sequence, and the buffer is edited in place.
         *   \uXXXX: 6 bytes input - UTF-8 max 3 bytes output
         *   \uXXXXXXXX: 10 bytes input - UTF-8 max 4 bytes output
         */
        dest += (int)unichar_width;
        break;

      default:
        raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal string escape \\%c in \"%s\"", c, (char*)start);
        return 0;
    }

    position++;
  } /* end while */
コード例 #4
0
ファイル: raptor_xml.c プロジェクト: Distrotech/raptor
/**
 * raptor_xml_escape_string_any:
 * @world: raptor world
 * @string: string to XML escape (UTF-8)
 * @len: length of string
 * @buffer: the buffer to use for new string (UTF-8) or NULL to just calculate expected length
 * @length: buffer size
 * @quote: optional quote character to escape for attribute content, or 0
 * @xml_version: XML 1.0 (10) or XML 1.1 (11)
 *
 * Return an XML-escaped version a string.
 * 
 * Follows
 * <ulink url="http://www.w3.org/TR/xml-c14n#ProcessingModel">Canonical XML rules on Text Nodes and Attribute Nodes</ulink>
 *
 * Both:
 *   Replaces <literal>&amp;</literal> and <literal>&lt;</literal>
 *   with <literal>&amp;amp;</literal> and <literal>&amp;lt;</literal>
 * respectively, preserving other characters.
 * 
 * Text Nodes:
 *   <literal>&gt;</literal> is turned into <literal>&amp;gt;</literal>
 *   ##xD is turned into <literal>&amp;##xD;</literal>
 *
 * Attribute Nodes:
 *   <literal>&gt;</literal> is generated not <literal>&amp;gt</literal>.
 *   ##x9, ##xA and ##xD are turned into
 *   <literal>&amp;##x9;</literal>,
 *   <literal>&amp;##xA;</literal> and
 *   <literal>&amp;##xD;</literal>
 *   entities.
 *
 * If @quote is given it can be either of '\'' or '\"'
 * which will be turned into <literal>&amp;apos;</literal> or
 * <literal>&amp;quot;</literal> respectively.
 * ASCII NUL ('\0') or any other character will not be escaped.
 * 
 * If @buffer is NULL, no work is done but the size of buffer
 * required is returned.  The output in buffer remains in UTF-8.
 *
 * If the input @string is empty, a single NUL will be written to the
 * buffer.
 *
 * Return value: the number of bytes required / used or <0 on failure.
 **/
int
raptor_xml_escape_string_any(raptor_world *world,
                             const unsigned char *string, size_t len,
                             unsigned char *buffer, size_t length,
                             char quote,
                             int xml_version)
{
  size_t l;
  size_t new_len = 0;
  const unsigned char *p;
  unsigned char *q;
  int unichar_len;
  raptor_unichar unichar;

  if(!string)
    return -1;
  
  RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1);

  raptor_world_open(world);

  if(quote != '\"' && quote != '\'')
    quote='\0';

  for(l = len, p = string; l; p++, l--) {
    if(*p > 0x7f) {
      unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar);
      if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > l) {
        raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL,
                         "Bad UTF-8 encoding.");
        return -1;
      }
    } else {
      unichar=*p;
      unichar_len = 1;
    }
  
    if(unichar == '&')
      /* &amp; */
      new_len+= 5;
    else if(unichar == '<' || (!quote && unichar == '>'))
      /* &lt; or &gt; */
      new_len+= 4;
    else if(quote && unichar == (unsigned long)quote)
      /* &apos; or &quot; */
      new_len+= 6;
    else if(unichar == 0x0d ||
             (quote && (unichar == 0x09 || unichar == 0x0a)))
      /* &#xD; or &#x9; or &xA; */
      new_len+= 5;
    else if(unichar == 0x7f ||
             (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) {
      if(!unichar || xml_version < 11) {
        raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, NULL,
                                   "Cannot write illegal XML 1.0 character U+%6lX.",
                                   unichar);
      } else {
        /* &#xX; */
        new_len+= 5;
        if(unichar > 0x0f)
          new_len++;
      }
    } else
      new_len+= unichar_len;

    unichar_len--; /* since loop does len-- */
    p += unichar_len; l -= unichar_len;
  }

  if(length && new_len > length)
    return 0;

  if(!buffer)
    return RAPTOR_BAD_CAST(int, new_len);
  
  for(l = len, p = string, q = buffer; l; p++, l--) {
    if(*p > 0x7f) {
      unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar);
      /* if the UTF-8 encoding is bad, we already did return -1 above */
    } else {
      unichar=*p;
      unichar_len = 1;
    }

    if(unichar == '&') {
      memcpy(q, "&amp;", 5);
      q+= 5;
    } else if(unichar == '<') {
      memcpy(q, "&lt;", 4);
      q+= 4;
    } else if(!quote && unichar == '>') {
      memcpy(q, "&gt;", 4);
      q+= 4;
    } else if(quote && unichar == (unsigned long)quote) {
      if(quote == '\'')  
        memcpy(q, "&apos;", 6);
      else
        memcpy(q, "&quot;", 6);
      q+= 6;
    } else if(unichar == 0x0d ||
               (quote && (unichar == 0x09 || unichar == 0x0a))) {
      /* &#xX; */
      *q++='&';
      *q++='#';
      *q++='x';
      if(unichar == 0x09)
        *q++ = '9';
      else
        *q++ = 'A'+ ((char)unichar-0x0a);
      *q++= ';';
    } else if(unichar == 0x7f ||
               (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) {
      if(!unichar || xml_version < 11) {
        raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, NULL,
                                   "Cannot write illegal XML 1.0 character U+%6lX.",
                                   unichar);
      } else {
        /* &#xX; */
        *q++ = '&';
        *q++ = '#';
        *q++ = 'x';
        q += raptor_format_integer((char*)q, 3, 
                                   RAPTOR_GOOD_CAST(unsigned int, unichar), 
                                   /* base */ 16, -1, '\0');
        *q++ = ';';
      }
    } else {
      /* coverity[negative_returns]
       * negative unichar_len values are checked and cause return -1 above */
      memcpy(q, p, unichar_len);
      q+= unichar_len;
    }

    unichar_len--; /* since loop does len-- */
    p += unichar_len; l -= unichar_len;
  }