C++ (Cpp) e_util_unicode_get_utf8 Examples

Example #1

0

Show file

File: hildon-helper.c Project: archlinuxarm-n900/libhildon

/**
 * hildon_helper_strip_string:
 * @string: a string to be stripped off.
 *
 * Strips all capitalization and accentuation marks from a string.
 * The returned Unicode string is %NULL-terminated.
 *
 * Returns: a newly allocated Unicode, lowercase, and without accentuation
 * marks version of @string, or %NULL if @string is an empty string.
 *
 * Since: 2.2.18
 **/
gunichar *
hildon_helper_strip_string (const gchar *string)
{
  gunichar *nuni;
  gint nlen;
  gunichar unival;
  const gchar *p;

  if (strlen (string) == 0) return NULL;

  nuni = g_malloc (sizeof (gunichar) * (strlen (string) + 1));

  nlen = 0;
  for (p = e_util_unicode_get_utf8 (string, &unival);
       p && unival;
       p = e_util_unicode_get_utf8 (p, &unival)) {
      gunichar sc;
      sc = stripped_char (unival);
      if (sc) {
          nuni[nlen++] = sc;
      }
  }

  /* NULL means there was illegal utf-8 sequence */
  if (!p) nlen = 0;

  nuni[nlen] = 0;

  return nuni;
}

Example #2

0

Show file

File: e-book-backend-sexp.c Project: gcampax/evolution-data-server

/* converts str into utf8 GString in lowercase;
 * returns NULL if str is invalid utf8 string otherwise
 * returns newly allocated GString
*/
static GString *
chars_to_unistring_lowercase (const gchar *pstr)
{
	GString *res;
	gunichar unich;
	gchar *p, *str;

	if (pstr == NULL)
		return NULL;

	str = e_util_utf8_remove_accents (pstr);
	if (!str)
		return NULL;

	res = g_string_new ("");

	for (p = e_util_unicode_get_utf8 (str, &unich); p && unich; p = e_util_unicode_get_utf8 (p, &unich)) {
		g_string_append_unichar (res, g_unichar_tolower (unich));
	}

	g_free (str);

	/* it was invalid unichar string */
	if (p == NULL) {
		g_string_free (res, TRUE);
		return NULL;
	}

	return res;
}

Example #3

0

Show file

File: hildon-helper.c Project: archlinuxarm-n900/libhildon

/**
 * get_next:
 * @p: a pointer to the string to search.
 * @o: a place to store the location of the next valid char.
 * @out: a place to store the next valid char.
 * @separators: whether to search only for alphanumeric strings
 * and skip any word separator.
 *
 * Gets the next character that is valid in our search scope, and
 * store it into @out. The next char, after @out is returned.
 *
 * Returns: the next point in the string @p where to continue the
 * string iteration.
 **/
static const gchar *
get_next (const gchar *p, const gchar **o, gunichar *out, gboolean separators)
{
  gunichar utf8;

  if (separators) {
    do {
       *o = p;
       p = e_util_unicode_get_utf8 (p, &utf8);
       *out = stripped_char (utf8);
    } while (p && utf8 && !g_unichar_isalnum (*out));
  } else {
    *o = p;
    p = e_util_unicode_get_utf8 (p, &utf8);
    *out = stripped_char (utf8);
  }

  return p;
}

Example #4

0

Show file

File: hildon-helper.c Project: archlinuxarm-n900/libhildon

/**
 * hildon_helper_utf8_strstrcasedecomp_needle_stripped:
 * @haystack: a haystack where to search
 * @nuni: a needle to search for, already stripped with hildon_helper_strip_string()
 *
 * Heavily modified version of e_util_utf8_strstrcasedecomp(). As its
 * original version, it finds the first occurrence of @nuni in
 * @haystack.  However, instead of stripping @nuni, it expect it to be
 * already stripped. See hildon_helper_strip_string().
 *
 * This is done for performance reasons, since this search is done
 * several times for the same string @nuni, it is undesired to strip
 * it more than once.
 *
 * Also, the search is done as a prefix search, starting in the first
 * alphanumeric character after any non-alphanumeric one. Searching
 * for "aba" in "Abasto" will match, searching in "Moraba" will not,
 * and searching in "A tool (abacus)" will do.
 *
 * Returns: the first instance of @nuni in @haystack
 *
 * Since: 2.2.18
 **/
const gchar *
hildon_helper_utf8_strstrcasedecomp_needle_stripped (const gchar *haystack, const gunichar *nuni)
{
  gunichar unival;
  gint nlen = 0;
  const gchar *o, *p;
  gunichar sc;

  if (haystack == NULL) return NULL;
  if (nuni == NULL) return NULL;
  if (strlen (haystack) == 0) return NULL;
  while (*(nuni + nlen) != 0) nlen++;

  if (nlen < 1) return haystack;

  for (p = get_next (haystack, &o, &sc, g_unichar_isalnum (nuni[0]));
       p && sc;
       p = get_next (p, &o, &sc, g_unichar_isalnum (nuni[0]))) {
    if (sc) {
      /* We have valid stripped gchar */
      if (sc == nuni[0]) {
        const gchar *q = p;
        gint npos = 1;
        while (npos < nlen) {
          q = e_util_unicode_get_utf8 (q, &unival);
          if (!q || !unival) return NULL;
          sc = stripped_char (unival);
          if ((!sc) || (sc != nuni[npos])) break;
          npos++;
        }
        if (npos == nlen) {
          return o;
        }
      }
    }
    while (p) {
      sc = g_utf8_get_char (p);
      if (!g_unichar_isalnum (sc))
        break;
      p = g_utf8_next_char (p);
    }
  }

  return NULL;
}

Example #5

0

Show file

File: e-book-backend-sexp.c Project: cwalkatron/evolution-data-server

static gboolean
try_contains_word (const gchar *s1,
                   GSList *word)
{
	const gchar *o, *p;
	gunichar unival, first_w_char;
	GString *w;

	if (s1 == NULL)
		return FALSE;
	if (word == NULL)
		return TRUE; /* previous was last word */
	if (word->data == NULL)
		return FALSE; /* illegal structure */

	w = word->data;
	first_w_char = g_utf8_get_char (w->str);

	o = s1;
	for (p = e_util_unicode_get_utf8 (o, &unival); p && unival; p = e_util_unicode_get_utf8 (p, &unival)) {
		if (unival == first_w_char) {
			gunichar unival2;
			const gchar *q = p;
			const gchar *r = e_util_unicode_get_utf8 (w->str, &unival2);
			while (q && r && unival && unival2) {
				q = e_util_unicode_get_utf8 (q, &unival);
				if (!q)
					break;
				r = e_util_unicode_get_utf8 (r, &unival2);
				if (!r)
					break;
				if (unival != unival2)
					break;
			}
			if (!unival2 && r && q) {
				/* we read whole word and no illegal character has been found */
				if (word->next == NULL ||
				    try_contains_word (e_util_unicode_get_utf8 (o, &unival), word->next)) {
					return TRUE;
				}
			}
		}
		o = p;
	}

	return FALSE;
}

Example #6

0

Show file

File: e-book-backend-sexp.c Project: cwalkatron/evolution-data-server

/* first space between words is treated as wildcard character;
 * we are looking for s2 in s1, so s2 will be breaked into words
*/
static gboolean
contains_helper (const gchar *s1,
                 const gchar *s2,
                 const gchar *region)
{
	gchar *s1uni;
	gchar *s2uni;
	GSList *words;
	gchar *next;
	gboolean have_nonspace;
	gboolean have_space;
	GString *last_word, *w;
	gboolean res;
	gunichar unich;
	glong len1, len2;

	if (!s2)
		return FALSE;

	/* the initial word contains an empty string for sure */
	if (!*s2)
		return TRUE;

	s1uni = e_util_utf8_normalize (s1);
	if (s1uni == NULL)
		return FALSE;

	s2uni = e_util_utf8_normalize (s2);
	if (s2uni == NULL) {
		g_free (s1uni);
		return FALSE;
	}

	len1 = g_utf8_strlen (s1uni, -1);
	len2 = g_utf8_strlen (s2uni, -1);
	if (len1 == 0 || len2 == 0) {
		g_free (s1uni);
		g_free (s2uni);

		/* both are empty strings */
		if (len1 == len2)
			return TRUE;

		return FALSE;
	}

	/* breaking s2 into words */
	words = NULL;
	have_nonspace = FALSE;
	have_space = FALSE;
	last_word = NULL;
	w = g_string_new ("");
	for (next = e_util_unicode_get_utf8 (s2uni, &unich); next && unich; next = e_util_unicode_get_utf8 (next, &unich)) {
		if (unich == ' ') {
			if (have_nonspace && !have_space) {
				/* treat only first space after nonspace character as wildcard,
				 * so we will start new word here
				*/
				have_space = TRUE;
				words = g_slist_append (words, w);
				last_word = w;
				w = g_string_new ("");
			} else {
				g_string_append_unichar (w, unich);
			}
		} else {
			have_nonspace = TRUE;
			have_space = FALSE;
			g_string_append_unichar (w, unich);
		}
	}

	if (have_space) {
		/* there was one or more spaces at the end of string,
		 * concat actual word with that last one
		*/
		g_string_append_len (last_word, w->str, w->len);
		g_string_free (w, TRUE);
	} else {
		/* append actual word into words list */
		words = g_slist_append (words, w);
	}

	res = try_contains_word (s1uni, words);

	g_free (s1uni);
	g_free (s2uni);
	g_slist_foreach (words, contains_helper_free_word, NULL);
	g_slist_free (words);

	return res;
}