/** * hildon_helper_strip_string: * @string: a string to be stripped off. * * Strips all capitalization and accentuation marks from a string. * The returned Unicode string is %NULL-terminated. * * Returns: a newly allocated Unicode, lowercase, and without accentuation * marks version of @string, or %NULL if @string is an empty string. * * Since: 2.2.18 **/ gunichar * hildon_helper_strip_string (const gchar *string) { gunichar *nuni; gint nlen; gunichar unival; const gchar *p; if (strlen (string) == 0) return NULL; nuni = g_malloc (sizeof (gunichar) * (strlen (string) + 1)); nlen = 0; for (p = e_util_unicode_get_utf8 (string, &unival); p && unival; p = e_util_unicode_get_utf8 (p, &unival)) { gunichar sc; sc = stripped_char (unival); if (sc) { nuni[nlen++] = sc; } } /* NULL means there was illegal utf-8 sequence */ if (!p) nlen = 0; nuni[nlen] = 0; return nuni; }
/* converts str into utf8 GString in lowercase; * returns NULL if str is invalid utf8 string otherwise * returns newly allocated GString */ static GString * chars_to_unistring_lowercase (const gchar *pstr) { GString *res; gunichar unich; gchar *p, *str; if (pstr == NULL) return NULL; str = e_util_utf8_remove_accents (pstr); if (!str) return NULL; res = g_string_new (""); for (p = e_util_unicode_get_utf8 (str, &unich); p && unich; p = e_util_unicode_get_utf8 (p, &unich)) { g_string_append_unichar (res, g_unichar_tolower (unich)); } g_free (str); /* it was invalid unichar string */ if (p == NULL) { g_string_free (res, TRUE); return NULL; } return res; }
/** * get_next: * @p: a pointer to the string to search. * @o: a place to store the location of the next valid char. * @out: a place to store the next valid char. * @separators: whether to search only for alphanumeric strings * and skip any word separator. * * Gets the next character that is valid in our search scope, and * store it into @out. The next char, after @out is returned. * * Returns: the next point in the string @p where to continue the * string iteration. **/ static const gchar * get_next (const gchar *p, const gchar **o, gunichar *out, gboolean separators) { gunichar utf8; if (separators) { do { *o = p; p = e_util_unicode_get_utf8 (p, &utf8); *out = stripped_char (utf8); } while (p && utf8 && !g_unichar_isalnum (*out)); } else { *o = p; p = e_util_unicode_get_utf8 (p, &utf8); *out = stripped_char (utf8); } return p; }
/** * hildon_helper_utf8_strstrcasedecomp_needle_stripped: * @haystack: a haystack where to search * @nuni: a needle to search for, already stripped with hildon_helper_strip_string() * * Heavily modified version of e_util_utf8_strstrcasedecomp(). As its * original version, it finds the first occurrence of @nuni in * @haystack. However, instead of stripping @nuni, it expect it to be * already stripped. See hildon_helper_strip_string(). * * This is done for performance reasons, since this search is done * several times for the same string @nuni, it is undesired to strip * it more than once. * * Also, the search is done as a prefix search, starting in the first * alphanumeric character after any non-alphanumeric one. Searching * for "aba" in "Abasto" will match, searching in "Moraba" will not, * and searching in "A tool (abacus)" will do. * * Returns: the first instance of @nuni in @haystack * * Since: 2.2.18 **/ const gchar * hildon_helper_utf8_strstrcasedecomp_needle_stripped (const gchar *haystack, const gunichar *nuni) { gunichar unival; gint nlen = 0; const gchar *o, *p; gunichar sc; if (haystack == NULL) return NULL; if (nuni == NULL) return NULL; if (strlen (haystack) == 0) return NULL; while (*(nuni + nlen) != 0) nlen++; if (nlen < 1) return haystack; for (p = get_next (haystack, &o, &sc, g_unichar_isalnum (nuni[0])); p && sc; p = get_next (p, &o, &sc, g_unichar_isalnum (nuni[0]))) { if (sc) { /* We have valid stripped gchar */ if (sc == nuni[0]) { const gchar *q = p; gint npos = 1; while (npos < nlen) { q = e_util_unicode_get_utf8 (q, &unival); if (!q || !unival) return NULL; sc = stripped_char (unival); if ((!sc) || (sc != nuni[npos])) break; npos++; } if (npos == nlen) { return o; } } } while (p) { sc = g_utf8_get_char (p); if (!g_unichar_isalnum (sc)) break; p = g_utf8_next_char (p); } } return NULL; }
static gboolean try_contains_word (const gchar *s1, GSList *word) { const gchar *o, *p; gunichar unival, first_w_char; GString *w; if (s1 == NULL) return FALSE; if (word == NULL) return TRUE; /* previous was last word */ if (word->data == NULL) return FALSE; /* illegal structure */ w = word->data; first_w_char = g_utf8_get_char (w->str); o = s1; for (p = e_util_unicode_get_utf8 (o, &unival); p && unival; p = e_util_unicode_get_utf8 (p, &unival)) { if (unival == first_w_char) { gunichar unival2; const gchar *q = p; const gchar *r = e_util_unicode_get_utf8 (w->str, &unival2); while (q && r && unival && unival2) { q = e_util_unicode_get_utf8 (q, &unival); if (!q) break; r = e_util_unicode_get_utf8 (r, &unival2); if (!r) break; if (unival != unival2) break; } if (!unival2 && r && q) { /* we read whole word and no illegal character has been found */ if (word->next == NULL || try_contains_word (e_util_unicode_get_utf8 (o, &unival), word->next)) { return TRUE; } } } o = p; } return FALSE; }
/* first space between words is treated as wildcard character; * we are looking for s2 in s1, so s2 will be breaked into words */ static gboolean contains_helper (const gchar *s1, const gchar *s2, const gchar *region) { gchar *s1uni; gchar *s2uni; GSList *words; gchar *next; gboolean have_nonspace; gboolean have_space; GString *last_word, *w; gboolean res; gunichar unich; glong len1, len2; if (!s2) return FALSE; /* the initial word contains an empty string for sure */ if (!*s2) return TRUE; s1uni = e_util_utf8_normalize (s1); if (s1uni == NULL) return FALSE; s2uni = e_util_utf8_normalize (s2); if (s2uni == NULL) { g_free (s1uni); return FALSE; } len1 = g_utf8_strlen (s1uni, -1); len2 = g_utf8_strlen (s2uni, -1); if (len1 == 0 || len2 == 0) { g_free (s1uni); g_free (s2uni); /* both are empty strings */ if (len1 == len2) return TRUE; return FALSE; } /* breaking s2 into words */ words = NULL; have_nonspace = FALSE; have_space = FALSE; last_word = NULL; w = g_string_new (""); for (next = e_util_unicode_get_utf8 (s2uni, &unich); next && unich; next = e_util_unicode_get_utf8 (next, &unich)) { if (unich == ' ') { if (have_nonspace && !have_space) { /* treat only first space after nonspace character as wildcard, * so we will start new word here */ have_space = TRUE; words = g_slist_append (words, w); last_word = w; w = g_string_new (""); } else { g_string_append_unichar (w, unich); } } else { have_nonspace = TRUE; have_space = FALSE; g_string_append_unichar (w, unich); } } if (have_space) { /* there was one or more spaces at the end of string, * concat actual word with that last one */ g_string_append_len (last_word, w->str, w->len); g_string_free (w, TRUE); } else { /* append actual word into words list */ words = g_slist_append (words, w); } res = try_contains_word (s1uni, words); g_free (s1uni); g_free (s2uni); g_slist_foreach (words, contains_helper_free_word, NULL); g_slist_free (words); return res; }