/* The input word in this method MUST be normalized in NFKD form,
 * and given in UTF-8, where str_length is the byte-length */
gboolean
tracker_parser_unaccent_nfkd_string (gpointer  str,
                                     gsize    *str_length)
{
	gchar *word;
	gsize word_length;
	gsize i;
	gsize j;

	g_return_val_if_fail (str != NULL, FALSE);
	g_return_val_if_fail (str_length != NULL, FALSE);
	g_return_val_if_fail (*str_length > 0, FALSE);

	word = (gchar *)str;
	word_length = *str_length;

	i = 0;
	j = 0;
	while (i < word_length) {
		ucs4_t unichar;
		gint utf8_len;

		/* Get next character of the word as UCS4 */
		utf8_len = u8_strmbtouc (&unichar, &word[i]);

		/* Invalid UTF-8 character or end of original string. */
		if (utf8_len <= 0) {
			break;
		}

		/* If the given unichar is a combining diacritical mark,
		 * just update the original index, not the output one */
		if (IS_CDM_UCS4 ((guint32) unichar)) {
			i += utf8_len;
			continue;
		}

		/* If already found a previous combining
		 * diacritical mark, indexes are different so
		 * need to copy characters. As output and input
		 * buffers may overlap, need to use memmove
		 * instead of memcpy */
		if (i != j) {
			memmove (&word[j], &word[i], utf8_len);
		}

		/* Update both indexes */
		i += utf8_len;
		j += utf8_len;
	}

	/* Force proper string end */
	word[j] = '\0';

	/* Set new output length */
	*str_length = j;

	return TRUE;
}
/* Copied from tracker/src/libtracker-fts/tracker-parser-glib.c under the GPL
 * And then from gnome-shell/src/shell-util.c
 *
 * Originally written by Aleksander Morgado <*****@*****.**>
 */
char *
cc_util_normalize_casefold_and_unaccent (const char *str)
{
  char *normalized, *tmp;
  int i = 0, j = 0, ilen;

  if (str == NULL)
    return NULL;

  normalized = g_utf8_normalize (str, -1, G_NORMALIZE_NFKD);
  tmp = g_utf8_casefold (normalized, -1);
  g_free (normalized);

  ilen = strlen (tmp);

  while (i < ilen)
    {
      gunichar unichar;
      gchar *next_utf8;
      gint utf8_len;

      /* Get next character of the word as UCS4 */
      unichar = g_utf8_get_char_validated (&tmp[i], -1);

      /* Invalid UTF-8 character or end of original string. */
      if (unichar == (gunichar) -1 ||
          unichar == (gunichar) -2)
        {
          break;
        }

      /* Find next UTF-8 character */
      next_utf8 = g_utf8_next_char (&tmp[i]);
      utf8_len = next_utf8 - &tmp[i];

      if (IS_CDM_UCS4 ((guint32) unichar))
        {
          /* If the given unichar is a combining diacritical mark,
           * just update the original index, not the output one */
          i += utf8_len;
          continue;
        }

      /* If already found a previous combining
       * diacritical mark, indexes are different so
       * need to copy characters. As output and input
       * buffers may overlap, need to use memmove
       * instead of memcpy */
      if (i != j)
        {
          memmove (&tmp[j], &tmp[i], utf8_len);
        }

      /* Update both indexes */
      i += utf8_len;
      j += utf8_len;
    }

  /* Force proper string end */
  tmp[j] = '\0';

  return tmp;
}