Пример #1
0
static char *sanitize_utf8(const char *buf)
{
	const char *ptr = buf;

	// Count how many errors we encounter
	uint32_t i = 0;
	// Upper bounds to ensure termination even if u8_check is unsafe
	while (i < strlen(buf) && ptr < buf + strlen(buf)) {
		ptr = (char*)u8_check((uint8_t*)ptr, strlen(ptr));
		if (ptr == NULL) {
			break;
		}

		assert(ptr >= buf);
		assert(ptr < buf + strlen(buf));

		ptr++;
		i++;
	}

	// i is the total number of errors. We need 2 extra bytes for each rune
	char *safe_buf = xmalloc(strlen(buf) + i*2 + 1);
	char *safe_ptr = NULL;
	memcpy(safe_buf, buf, strlen(buf));

	// Fix exactly i errors
	for (uint32_t j = 0; j < i; j++) {
		// Always operate on the working buffer
		safe_ptr = (char*)u8_check((uint8_t*)safe_buf, strlen(safe_buf));

		// This implies we had less errors than we should.
		assert(safe_ptr != NULL);
		assert(safe_ptr >= safe_buf);
		assert(safe_ptr < safe_buf + strlen(safe_buf));

		// Shift the rest of the string by 2 bytes
		if (strlen(safe_ptr) > 1) {
			memcpy(safe_ptr + 3, safe_ptr + 1, strlen(safe_ptr + 1));
		}

		// UTF8 replacement rune
		safe_ptr[0] = (char)0xef;
		safe_ptr[1] = (char)0xbf;
		safe_ptr[2] = (char)0xbd;
	}

	// We now have a valid utf8 string
	assert(u8_check((uint8_t*)safe_buf, strlen(safe_buf)) == NULL);
	// We should be null terminated
	assert(safe_buf[strlen(buf) + i*2] == '\0');
	// We should be the right length
	assert(strlen(safe_buf) == (strlen(buf) + i*2));

	return safe_buf;
}
Пример #2
0
string downstring(string localword, string lang) {
  // old Way to do it, not unicode aware.....
  //
  //  for (unsigned int j=0; j < localword.length(); ++j)    {
  //    localword[j]=toupper(localword[j]);   
  //  }
  //  const uint8_t * word = static_cast<const uint8_t*>(localword.c_str());
  //  uint8_t * errCode;
  //  uint8_t val;
  //  errCode = &val;

  // New way to do it using libunicode
  //
  //Get string length
  size_t length = localword.size();
  // create correct type for c-style unicode string
  const uint8_t * word = (const uint8_t*)localword.c_str();
  // create output buffer
  uint8_t output[200];
  // create output length location
  size_t outLength = 200;
  // make lowercase, normalize and put output in the output buffer, length in the outLength variable
  if (u8_check(word, length)) {
    cerr << endl << "Invalid UTF-8 in word: "<< word << " : Dropping it." << endl;
    //    throw Exception("This is an invalid UTF8 in string. Please make sure that you are using UTF8 encoding in all input files. Exiting.");
    return(string(""));
  }
  if (!u8_tolower(word, length, lang.c_str(), UNINORM_NFKC, output, &outLength))  {
    cerr << endl << "Error during lowercase conversion for word : "<< word << " : Dropping it." << endl;
    //    throw Exception("Error during case conversion (in downstring) ");
    return(string(""));
  }
  // return a c++ string, using begining and end pointers to the c-style string!
  return(string((const char *)output,(const char *)output+outLength));
}
Пример #3
0
char *
unicode_fixup_string(char *str, const char *fromcode)
{
  uint8_t *ret;
  size_t len;

  if (!str)
    return NULL;

  len = strlen(str);

  /* String is valid UTF-8 */
  if (!u8_check((uint8_t *)str, len))
    {
      if (len >= 3)
	{
	  /* Check for and strip byte-order mark */
	  if (memcmp("\xef\xbb\xbf", str, 3) == 0)
	    memmove(str, str + 3, len - 3 + 1);
	}

      return str;
    }

  ret = u8_strconv_from_encoding(str, fromcode, iconveh_question_mark);
  if (!ret)
    {
      DPRINTF(E_LOG, L_MISC, "Could not convert string '%s' to UTF-8: %s\n", str, strerror(errno));

      return NULL;
    }

  return (char *)ret;
}
Пример #4
0
bool sss_utf8_check(const uint8_t *s, size_t n)
{
    if (u8_check(s, n) == NULL) {
        return true;
    }
    return false;
}
Пример #5
0
/* A tricky optimization, but probably worth it.  */
unsigned long 
scm_i_utf8_string_hash (const char *str, size_t len)
{
  const scm_t_uint8 *end, *ustr = (const scm_t_uint8 *) str;
  unsigned long ret;

  /* The length of the string in characters.  This name corresponds to
     Jenkins' original name.  */
  size_t length;

  scm_t_uint32 a, b, c, u32;

  if (len == (size_t) -1)
    len = strlen (str);

  end = ustr + len;

  if (u8_check (ustr, len) != NULL)
    /* Invalid UTF-8; punt.  */
    return scm_i_string_hash (scm_from_utf8_stringn (str, len));

  length = u8_strnlen (ustr, len);

  /* Set up the internal state.  */
  a = b = c = 0xdeadbeef + ((scm_t_uint32)(length<<2)) + 47;

  /* Handle most of the key.  */
  while (length > 3)
    {
      ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
      a += u32;
      ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
      b += u32;
      ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
      c += u32;
      mix (a, b, c);
      length -= 3;
    }

  /* Handle the last 3 elements's.  */
  ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
  a += u32;
  if (--length)
    {
      ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
      b += u32;
      if (--length)
        {
          ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr);
          c += u32;
        }
    }

  final (a, b, c);
Пример #6
0
/* Returns an encoding guess based on ENCODING and the N bytes of text starting
   at DATA.  DATA should start with the first non-ASCII text character (as
   determined by encoding_guess_is_ascii_text()) found in the input.

   The return value is:

       0, if the encoding is definitely not UTF-8 (because the input contains
       byte sequences that are not valid in UTF-8).

       1, if the encoding appears to be UTF-8 (because the input contains valid
       UTF-8 multibyte sequences).

       -1, if the input contains only ASCII characters.  (This means that the
       input may be treated as UTF-8, since ASCII is a subset of UTF-8.)

   See encoding-guesser.h for intended use of this function.

   N must be at least ENCODING_GUESS_MIN, unless the file has fewer bytes than
   that starting with the first non-ASCII text character. */
int
encoding_guess_tail_is_utf8 (const void *data, size_t n)
{
  /* If all the bytes are in the ASCII range, it's just ASCII. */
  if (encoding_guess_count_ascii (data, n) == n)
    return -1;

  return (n < ENCODING_GUESS_MIN
          ? u8_check (data, n) == NULL
          : is_all_utf8_text (data, n));
}
char *
u8_strconv_to_encoding (const uint8_t *string,
                        const char *tocode,
                        enum iconv_ilseq_handler handler)
{
  char *result;
  size_t length;

  if (STRCASEEQ (tocode, "UTF-8", 'U','T','F','-','8',0,0,0,0))
    {
      /* Conversion from UTF-8 to UTF-8.  No need to go through iconv().  */
      length = u8_strlen (string) + 1;
#if CONFIG_UNICODE_SAFETY
      if (u8_check (string, length))
        {
          errno = EILSEQ;
          return NULL;
        }
#endif
      result = (char *) malloc (length);
      if (result == NULL)
        {
          errno = ENOMEM;
          return NULL;
        }
      memcpy (result, (const char *) string, length);
      return result;
    }
  else
    {
      result = NULL;
      length = 0;
      if (mem_iconveha ((const char *) string, u8_strlen (string) + 1,
                        "UTF-8", tocode,
                        handler == iconveh_question_mark, handler,
                        NULL, &result, &length) < 0)
        return NULL;
      /* Verify the result has exactly one NUL byte, at the end.  */
      if (!(length > 0 && result[length-1] == '\0'
            && strlen (result) == length-1))
        {
          free (result);
          errno = EILSEQ;
          return NULL;
        }
      return result;
    }
}
Пример #8
0
char *
u8_conv_to_encoding (const char *tocode,
                     enum iconv_ilseq_handler handler,
                     const uint8_t *src, size_t srclen,
                     size_t *offsets,
                     char *resultbuf, size_t *lengthp)
{
  if (STRCASEEQ (tocode, "UTF-8", 'U','T','F','-','8',0,0,0,0))
    {
      char *result;

      /* Conversion from UTF-8 to UTF-8.  No need to go through iconv().  */
#if CONFIG_UNICODE_SAFETY
      if (u8_check (src, srclen))
        {
          errno = EILSEQ;
          return NULL;
        }
#endif

      /* Memory allocation.  */
      if (resultbuf != NULL && *lengthp >= srclen)
        result = resultbuf;
      else
        {
          result = (char *) malloc (srclen > 0 ? srclen : 1);
          if (result == NULL)
            {
              errno = ENOMEM;
              return NULL;
            }
        }

      memcpy (result, (const char *) src, srclen);
      *lengthp = srclen;
      return result;
    }
  else
    {
      char *result = resultbuf;
      size_t length = *lengthp;

      if (mem_iconveha ((const char *) src, srclen,
                        "UTF-8", tocode,
                        handler == iconveh_question_mark, handler,
                        offsets, &result, &length) < 0)
        return NULL;

      if (result == NULL) /* when (resultbuf == NULL && length == 0)  */
        {
          result = (char *) malloc (1);
          if (result == NULL)
            {
              errno = ENOMEM;
              return NULL;
            }
        }
      *lengthp = length;
      return result;
    }
}
Пример #9
0
uint8_t *
u8_conv_from_encoding (const char *fromcode,
                       enum iconv_ilseq_handler handler,
                       const char *src, size_t srclen,
                       size_t *offsets,
                       uint8_t *resultbuf, size_t *lengthp)
{
    if (STRCASEEQ (fromcode, "UTF-8", 'U','T','F','-','8',0,0,0,0))
    {
        /* Conversion from UTF-8 to UTF-8.  No need to go through iconv().  */
        uint8_t *result;

        if (u8_check ((const uint8_t *) src, srclen))
        {
            errno = EILSEQ;
            return NULL;
        }

        if (offsets != NULL)
        {
            size_t i;

            for (i = 0; i < srclen; )
            {
                int count = u8_mblen ((const uint8_t *) src + i, srclen - i);
                /* We can rely on count > 0 because of the previous u8_check.  */
                if (count <= 0)
                    abort ();
                offsets[i] = i;
                i++;
                while (--count > 0)
                    offsets[i++] = (size_t)(-1);
            }
        }

        /* Memory allocation.  */
        if (resultbuf != NULL && *lengthp >= srclen)
            result = resultbuf;
        else
        {
            result = (uint8_t *) malloc (srclen > 0 ? srclen : 1);
            if (result == NULL)
            {
                errno = ENOMEM;
                return NULL;
            }
        }

        memcpy ((char *) result, src, srclen);
        *lengthp = srclen;
        return result;
    }
    else
    {
        char *result = (char *) resultbuf;
        size_t length = *lengthp;

        if (mem_iconveha (src, srclen, fromcode, "UTF-8", true, handler,
                          offsets, &result, &length) < 0)
            return NULL;

        if (result == NULL) /* when (resultbuf == NULL && length == 0)  */
        {
            result = (char *) malloc (1);
            if (result == NULL)
            {
                errno = ENOMEM;
                return NULL;
            }
        }
        *lengthp = length;
        return (uint8_t *) result;
    }
}
int
main ()
{
  /* Test empty string.  */
  {
    static const uint8_t input[] = "";
    ASSERT (u8_check (input, 0) == NULL);
  }

  /* Test valid non-empty string.  */
  {
    static const uint8_t input[] = /* "Данило Шеган" */
      "\320\224\320\260\320\275\320\270\320\273\320\276 \320\250\320\265\320\263\320\260\320\275";
    ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
  }

  /* Test out-of-range character with 4 bytes: U+110000.  */
  {
    static const uint8_t input[] = "\320\224\320\260\364\220\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test out-of-range character with 5 bytes: U+200000.  */
  {
    static const uint8_t input[] = "\320\224\320\260\370\210\200\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test out-of-range character with 6 bytes: U+4000000.  */
  {
    static const uint8_t input[] = "\320\224\320\260\374\204\200\200\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test invalid lead byte.  */
  {
    static const uint8_t input[] = "\320\224\320\260\376\200\200\200\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\377\200\200\200\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test overlong 2-byte character.  */
  {
    static const uint8_t input[] = "\320\224\320\260\301\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test overlong 3-byte character.  */
  {
    static const uint8_t input[] = "\320\224\320\260\340\200\277";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test overlong 4-byte character.  */
  {
    static const uint8_t input[] = "\320\224\320\260\360\200\277\277";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test invalid bytes in 2-byte character.  */
  {
    static const uint8_t input[] = "\320\224\320\260\302\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\302\100";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\302\300";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test invalid bytes in 3-byte character.  */
  {
    static const uint8_t input[] = "\320\224\320\260\342\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\342\100\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\342\300\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\342\200\100";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\342\200\300";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test invalid bytes in 4-byte character.  */
  {
    static const uint8_t input[] = "\320\224\320\260\362\200\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\362\100\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\362\300\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\362\200\100\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\362\200\300\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\362\200\200\100";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\362\200\200\300";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test truncated/incomplete 2-byte character.  */
  {
    static const uint8_t input[] = "\320\224\320\260\302";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test truncated/incomplete 3-byte character.  */
  {
    static const uint8_t input[] = "\320\224\320\260\342\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test truncated/incomplete 4-byte character.  */
  {
    static const uint8_t input[] = "\320\224\320\260\362\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test missing lead byte.  */
  {
    static const uint8_t input[] = "\320\224\320\260\200\200\200\200\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  /* Test surrogate codepoints.  */
  {
    static const uint8_t input[] = "\320\224\320\260\355\240\200\355\260\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }
  {
    static const uint8_t input[] = "\320\224\320\260\355\260\200";
    ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
  }

  return 0;
}
Пример #11
0
/**
 * gnutls_utf8_password_normalize:
 * @password: contain the UTF-8 formatted password
 * @plen: the length of the provided password
 * @out: the result in an null-terminated allocated string
 * @flags: should be zero
 *
 * This function will convert the provided UTF-8 password according
 * to the normalization rules in RFC7613.
 *
 * If the flag %GNUTLS_UTF8_IGNORE_ERRS is specified, any UTF-8 encoding
 * errors will be ignored, and in that case the output will be a copy of the input.
 *
 * Returns: %GNUTLS_E_INVALID_UTF8_STRING on invalid UTF-8 data, or 0 on success.
 *
 * Since: 3.5.7
 **/
int gnutls_utf8_password_normalize(const unsigned char *password, unsigned plen,
				   gnutls_datum_t *out, unsigned flags)
{
	size_t ucs4_size = 0, nrm_size = 0;
	size_t final_size = 0;
	uint8_t *final = NULL;
	uint32_t *ucs4 = NULL;
	uint32_t *nrm = NULL;
	uint8_t *nrmu8 = NULL;
	int ret;

	if (plen == 0) {
		out->data = (uint8_t*)gnutls_strdup("");
		out->size = 0;
		if (out->data == NULL)
			return gnutls_assert_val(GNUTLS_E_MEMORY_ERROR);
		return 0;
	}

	/* check for invalid UTF-8 */
	if (u8_check((uint8_t*)password, plen) != NULL) {
		gnutls_assert();
		if (flags & GNUTLS_UTF8_IGNORE_ERRS) {
 raw_copy:
			out->data = gnutls_malloc(plen+1);
			if (out->data == NULL)
				return gnutls_assert_val(GNUTLS_E_MEMORY_ERROR);
			out->size = plen;
			memcpy(out->data, password, plen);
			out->data[plen] = 0;
			return 0;
		} else {
			return GNUTLS_E_INVALID_UTF8_STRING;
		}
	}

	/* convert to UTF-32 */
	ucs4 = u8_to_u32((uint8_t*)password, plen, NULL, &ucs4_size);
	if (ucs4 == NULL) {
		gnutls_assert();
		ret = GNUTLS_E_PARSING_ERROR;
		goto fail;
	}

	ret = check_for_valid_freeformclass(ucs4, ucs4_size);
	if (ret < 0) {
		gnutls_assert();
		if (flags & GNUTLS_UTF8_IGNORE_ERRS) {
			free(ucs4);
			goto raw_copy;
		}
		if (ret == GNUTLS_E_INVALID_UTF8_STRING)
			ret = GNUTLS_E_INVALID_PASSWORD_STRING;
		goto fail;
	}

	/* normalize to NFC */
	nrm = u32_normalize(UNINORM_NFC, ucs4, ucs4_size, NULL, &nrm_size);
	if (nrm == NULL) {
		gnutls_assert();
		ret = GNUTLS_E_INVALID_PASSWORD_STRING;
		goto fail;
	}

	/* convert back to UTF-8 */
	final_size = 0;
	nrmu8 = u32_to_u8(nrm, nrm_size, NULL, &final_size);
	if (nrmu8 == NULL) {
		gnutls_assert();
		ret = GNUTLS_E_INVALID_PASSWORD_STRING;
		goto fail;
	}

	/* copy to output with null terminator */
	final = gnutls_malloc(final_size+1);
Пример #12
0
gboolean
mongo_bson_iter_next (MongoBsonIter *iter)
{
   const guint8 *rawbuf;
   gsize rawbuf_len;
   gsize offset;
   const gchar *key;
   MongoBsonType type;
   const guint8 *value1;
   const guint8 *value2;
   const gchar *end = NULL;
   guint32 max_len;

   g_return_val_if_fail(iter != NULL, FALSE);

   /*
    * Copy values onto stack from iter.
    */
   rawbuf = iter->user_data1;
   rawbuf_len = GPOINTER_TO_SIZE(iter->user_data2);
   offset = GPOINTER_TO_SIZE(iter->user_data3);
   key = (const gchar *)iter->user_data4;
   type = GPOINTER_TO_INT(iter->user_data5);
   value1 = (const guint8 *)iter->user_data6;
   value2 = (const guint8 *)iter->user_data7;

   /*
    * Unset the invalid utf8 field.
    */
   iter->flags &= ~ITER_INVALID_UTF8;

   /*
    * Check for end of buffer.
    */
   if ((offset + 1) >= rawbuf_len) {
      GOTO(failure);
   }

   /*
    * Get the type of the next field.
    */
   if (!(type = rawbuf[++offset])) {
      /*
       * This is the end of the iterator.
       */
      GOTO(failure);
   }

   /*
    * Get the key of the next field.
    */
   key = (const gchar *)&rawbuf[++offset];
   max_len = first_nul(key, rawbuf_len - offset - 1);
   if (!(iter->flags & ITER_TRUST_UTF8)) {
      if (!g_utf8_validate(key, max_len, &end)) {
         GOTO(failure);
      }
   }
   offset += strlen(key) + 1;

   switch (type) {
   case MONGO_BSON_UTF8:
      if ((offset + 5) < rawbuf_len) {
         value1 = &rawbuf[offset];
         offset += 4;
         value2 = &rawbuf[offset];
         max_len = GUINT32_FROM_LE(*(guint32 *)value1);
         if ((offset + max_len - 10) < rawbuf_len) {
            if (!(iter->flags & ITER_TRUST_UTF8)) {
               if ((end = (char *)u8_check((guint8 *)value2, max_len - 1))) {
                  /*
                   * Well, we have quite the delima here. The UTF-8 string is
                   * invalid, but there was definitely a key here. Consumers
                   * might need to get at data after this too. So the best
                   * we can do is probably set the value to as long of a valid
                   * utf-8 string as we can. We will simply NULL the end of
                   * the buffer at the given error offset.
                   */
                  *(gchar *)end = '\0';
                  offset += max_len - 1;
                  iter->flags |= ITER_INVALID_UTF8;
                  GOTO(success);
               }
            }
            offset += max_len - 1;
            if (value2[max_len - 1] == '\0') {
               GOTO(success);
            }
         }
      }
      GOTO(failure);
   case MONGO_BSON_DOCUMENT:
   case MONGO_BSON_ARRAY:
      if ((offset + 5) < rawbuf_len) {
         value1 = &rawbuf[offset];
         value2 = NULL;
         memcpy(&max_len, value1, sizeof max_len);
         max_len = GUINT32_FROM_LE(max_len);
         if ((offset + max_len) <= rawbuf_len) {
            offset += max_len - 1;
            GOTO(success);
         }
      }
      GOTO(failure);
   case MONGO_BSON_NULL:
   case MONGO_BSON_UNDEFINED:
      value1 = NULL;
      value2 = NULL;
      offset--;
      GOTO(success);
   case MONGO_BSON_OBJECT_ID:
      if ((offset + 12) < rawbuf_len) {
         value1 = &rawbuf[offset];
         value2 = NULL;
         offset += 11;
         GOTO(success);
      }
      GOTO(failure);
   case MONGO_BSON_BOOLEAN:
      if ((offset + 1) < rawbuf_len) {
         value1 = &rawbuf[offset];
         value2 = NULL;
         GOTO(success);
      }
      GOTO(failure);
   case MONGO_BSON_DATE_TIME:
   case MONGO_BSON_DOUBLE:
   case MONGO_BSON_INT64:
      if ((offset + 8) < rawbuf_len) {
         value1 = &rawbuf[offset];
         value2 = NULL;
         offset += 7;
         GOTO(success);
      }
      GOTO(failure);
   case MONGO_BSON_REGEX:
      value1 = &rawbuf[offset];
      max_len = first_nul((gchar *)value1, rawbuf_len - offset - 1);
      if (!(iter->flags & ITER_TRUST_UTF8)) {
         if (!g_utf8_validate((gchar *)value1, max_len, &end)) {
            GOTO(failure);
         }
      }
      offset += max_len + 1;
      if ((offset + 1) >= rawbuf_len) {
         GOTO(failure);
      }
      value2 = &rawbuf[offset];
      max_len = first_nul((gchar *)value2, rawbuf_len - offset - 1);
      if (!(iter->flags & ITER_TRUST_UTF8)) {
         if (!g_utf8_validate((gchar *)value2, max_len, &end)) {
            GOTO(failure);
         }
      }
      offset += max_len + 1;
      GOTO(success);
   case MONGO_BSON_INT32:
      if ((offset + 4) < rawbuf_len) {
         value1 = &rawbuf[offset];
         value2 = NULL;
         offset += 3;
         GOTO(success);
      }
      GOTO(failure);
   default:
      g_warning("Unknown type: %d key: %s", type, key);
      GOTO(failure);
   }

success:
   iter->user_data3 = GSIZE_TO_POINTER(offset);
   iter->user_data4 = (gpointer)key;
   iter->user_data5 = GINT_TO_POINTER(type);
   iter->user_data6 = (gpointer)value1;
   iter->user_data7 = (gpointer)value2;
   return TRUE;

failure:
   memset(iter, 0, sizeof *iter);
   return FALSE;
}
Пример #13
-1
char *
unicode_fixup_string(char *str)
{
  uint8_t *ret;
  size_t len;

  if (!str)
    return NULL;

  len = strlen(str);

  /* String is valid UTF-8 */
  if (!u8_check((uint8_t *)str, len))
    return str;

  ret = u8_conv_from_encoding("ascii", iconveh_question_mark, str, len, NULL, NULL, &len);
  if (!ret)
    {
      DPRINTF(E_LOG, L_MISC, "Could not convert string '%s' to UTF-8: %s\n", str, strerror(errno));

      return NULL;
    }

  return (char *)ret;
}