/* * compare 2 utf8 string */ gint hb_string_utf8_compare(gchar *s1, gchar *s2) { gint retval = 0; gchar *ns1, *ns2; if (s1 == NULL || s2 == NULL) { if (s1 == NULL && s2 == NULL) goto end; retval = (s1 == NULL) ? -1 : 1; } else { //#1325969 //retval = g_utf8_collate(s1 != NULL ? s1 : "", s2 != NULL ? s2 : ""); ns1 = g_utf8_normalize(s1, -1, G_NORMALIZE_DEFAULT); ns2 = g_utf8_normalize(s2, -1, G_NORMALIZE_DEFAULT); retval = strcasecmp(ns1, ns2); g_free(ns2); g_free(ns1); } end: return retval; }
static int best_distance(const char*const*const suggs, const char *const word, size_t len) { int best_dist; const char*const* sugg_it; char* normalized_word; normalized_word = g_utf8_normalize (word, len, G_NORMALIZE_NFD); best_dist = g_utf8_strlen(normalized_word, -1); if(suggs) { for(sugg_it = suggs; *sugg_it; ++sugg_it) { char* normalized_sugg; int dist; normalized_sugg = g_utf8_normalize (*sugg_it, -1, G_NORMALIZE_NFD); dist = edit_dist(normalized_word, normalized_sugg); g_free(normalized_sugg); if (dist < best_dist) best_dist = dist; } } g_free(normalized_word); return best_dist; }
/* Setup the fileitem, depending uri's scheme * Return a string to search in. */ static gchar * fileitem_setup (FileItem *item) { gchar *scheme; gchar *filename; gchar *normalized_filename = NULL; gchar *candidate = NULL; gchar *path; gchar *name; scheme = g_uri_parse_scheme (item->uri); if (g_strcmp0 (scheme, "file") == 0) { filename = g_filename_from_uri ((const gchar *)item->uri, NULL, NULL); if (filename) { path = g_path_get_dirname (filename); item->path = g_filename_to_utf8 (path, -1, NULL, NULL, NULL); g_free (path); name = g_path_get_basename (filename); item->name = g_filename_to_utf8 (name, -1, NULL, NULL, NULL); g_free (name); normalized_filename = g_utf8_normalize (filename, -1, G_NORMALIZE_ALL); g_free (filename); } } else { GFile *file; gchar *parse_name; file = g_file_new_for_uri (item->uri); item->path = gedit_utils_location_get_dirname_for_display (file); item->name = gedit_utils_basename_for_display (file); parse_name = g_file_get_parse_name (file); g_object_unref (file); normalized_filename = g_utf8_normalize (parse_name, -1, G_NORMALIZE_ALL); g_free (parse_name); } if (normalized_filename) { candidate = g_utf8_casefold (normalized_filename, -1); g_free (normalized_filename); } g_free (scheme); return candidate; }
static char * str_utf8_casefold_normalize (const char *text) { GString *fixed; char *tmp, *fold; char *result; const char *start; const char *end; fixed = g_string_sized_new (4); start = text; while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') { if (start != end) { fold = g_utf8_casefold (start, end - start); tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); g_string_append (fixed, tmp); g_free (tmp); g_free (fold); } g_string_append_c (fixed, end[0]); start = end + 1; } if (start == text) { fold = g_utf8_casefold (text, -1); result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); g_free (fold); g_string_free (fixed, TRUE); } else { if (start[0] != '\0' && start != end) { fold = g_utf8_casefold (start, end - start); tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); g_string_append (fixed, tmp); g_free (tmp); g_free (fold); } result = g_string_free (fixed, FALSE); } return result; }
static void pgd_selections_update_selected_text (PgdSelectionsDemo *demo) { GList *region; gchar *text; if (demo->selected_region) cairo_region_destroy (demo->selected_region); demo->selected_region = poppler_page_get_selected_region (demo->page, 1.0, demo->style, &demo->doc_area); if (demo->selected_text) g_free (demo->selected_text); demo->selected_text = NULL; text = poppler_page_get_selected_text (demo->page, demo->style, &demo->doc_area); if (text) { demo->selected_text = g_utf8_normalize (text, -1, G_NORMALIZE_NFKC); g_free (text); gtk_widget_set_sensitive(demo->copy_button, TRUE); } }
/* * call-seq: * utf_normalize(string, form) * * Returns the normalized form of the string. See http://www.unicode.org/reports/tr15/tr15-29.html for more * information about normalization. * * <i>form</i> can be one of the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. * * decomposed = [101, 769].pack('U*') * composed = Glib.utf8_normalize(decomposed, :kc) * composed.unpack('U*') #=> [233] */ static VALUE utf8_normalize(VALUE self, VALUE string, VALUE form) { VALUE result; gchar *temp; GNormalizeMode mode; Check_Type(string, T_STRING); Check_Type(form, T_SYMBOL); if (ID2SYM(rb_intern("d")) == form) { mode = G_NORMALIZE_NFD; } else if (ID2SYM(rb_intern("c")) == form) { mode = G_NORMALIZE_NFC; } else if (ID2SYM(rb_intern("kd")) == form) { mode = G_NORMALIZE_NFKD; } else if (ID2SYM(rb_intern("kc")) == form) { mode = G_NORMALIZE_NFKC; } else { rb_raise(rb_eArgError, "%s is not a valid normalization form, options are: :d, :kd, :c, or :kc", RSTRING_PTR(rb_inspect(form))); } temp = g_utf8_normalize(StringValuePtr(string), RSTRING_LEN(string), mode); result = rb_str_new2(temp); free(temp); return result; }
static void utf8_tool_compose (char *buffer, size_t size) { char *composed = g_utf8_normalize (buffer, -1, G_NORMALIZE_DEFAULT_COMPOSE); g_strlcpy (buffer, composed, size); g_free (composed); }
CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const { if (!m_name) return CString(); if (!length) return ""; #if USE(ICU_UNICODE) // FIXME: What's the right place to do normalization? // It's a little strange to do it inside the encode function. // Perhaps normalization should be an explicit step done before calling encode. const UChar* source = characters; size_t sourceLength = length; Vector<UChar> normalizedCharacters; UErrorCode err = U_ZERO_ERROR; if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) { // First try using the length of the original string, since normalization to NFC rarely increases length. normalizedCharacters.grow(sourceLength); int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err); if (err == U_BUFFER_OVERFLOW_ERROR) { err = U_ZERO_ERROR; normalizedCharacters.resize(normalizedLength); normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err); } ASSERT(U_SUCCESS(err)); source = normalizedCharacters.data(); sourceLength = normalizedLength; } return newTextCodec(*this)->encode(source, sourceLength, handling); #elif USE(QT4_UNICODE) QString str(reinterpret_cast<const QChar*>(characters), length); str = str.normalized(QString::NormalizationForm_C); return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), handling); #elif USE(GLIB_UNICODE) GOwnPtr<char> UTF8Source; UTF8Source.set(g_utf16_to_utf8(characters, length, 0, 0, 0)); if (!UTF8Source) { // If conversion to UTF-8 failed, try with the string without normalization return newTextCodec(*this)->encode(characters, length, handling); } GOwnPtr<char> UTF8Normalized; UTF8Normalized.set(g_utf8_normalize(UTF8Source.get(), -1, G_NORMALIZE_NFC)); long UTF16Length; GOwnPtr<UChar> UTF16Normalized; UTF16Normalized.set(g_utf8_to_utf16(UTF8Normalized.get(), -1, 0, &UTF16Length, 0)); return newTextCodec(*this)->encode(UTF16Normalized.get(), UTF16Length, handling); #elif OS(WINCE) // normalization will be done by Windows CE API OwnPtr<TextCodec> textCodec = newTextCodec(*this); return textCodec.get() ? textCodec->encode(characters, length, handling) : CString(); #endif }
/*! \brief gets called to set the entry backgroup color based on a match or not \param entry is a pointer to the MaskEntry structure */ G_MODULE_EXPORT void mask_entry_set_background (MaskEntry *entry) { gchar *tmpbuf = NULL; gchar *tmpstr = NULL; gint len = 0; static const GdkColor error_color = { 0, 65535, 60000, 60000 }; if (entry->mask) { tmpstr = g_utf8_normalize(gtk_entry_get_text (GTK_ENTRY (entry)),-1,G_NORMALIZE_DEFAULT); tmpbuf = g_utf8_casefold(tmpstr,-1); g_free(tmpstr); if (g_regex_match_simple(tmpbuf,entry->mask, (GRegexCompileFlags)0,(GRegexMatchFlags)0)) { gtk_widget_modify_base (GTK_WIDGET (entry), GTK_STATE_NORMAL, &error_color); g_free(tmpbuf); return; } g_free(tmpbuf); } gtk_widget_modify_base (GTK_WIDGET (entry), GTK_STATE_NORMAL, NULL); return; }
static void set_text_expansion (AboutRenderer *r, double er) { const char *text = pango_layout_get_text (r->layout); GString *str = g_string_new (NULL); char *ntext; const char *p; r->expansion.rate = er; r->expansion.count = 0; /* Normalize to make sure diacriticals are combined. */ ntext = g_utf8_normalize (text, -1, G_NORMALIZE_DEFAULT_COMPOSE); /* Insert inter-letter spaces we can stretch. */ for (p = ntext; *p; p = g_utf8_next_char (p)) { gunichar uc = g_utf8_get_char (p); if (uc == UNICODE_ZERO_WIDTH_SPACE_C) continue; if (str->len) { g_string_append_unichar (str, UNICODE_ZERO_WIDTH_SPACE_C); r->expansion.count++; } g_string_append_unichar (str, uc); } g_free (ntext); pango_layout_set_text (r->layout, str->str, -1); g_string_free (str, TRUE); }
static gboolean _completion_match_func(GtkEntryCompletion *completion, const gchar *key, GtkTreeIter *iter, gpointer user_data) { gboolean res = FALSE; char *tag = NULL; GtkTreeModel *model = gtk_entry_completion_get_model(completion); int column = gtk_entry_completion_get_text_column(completion); if(gtk_tree_model_get_column_type(model, column) != G_TYPE_STRING) return FALSE; gtk_tree_model_get(model, iter, column, &tag, -1); if(tag) { char *normalized = g_utf8_normalize(tag, -1, G_NORMALIZE_ALL); if(normalized) { char *casefold = g_utf8_casefold(normalized, -1); if(casefold) { res = g_strstr_len(casefold, -1, key) != NULL; } g_free(casefold); } g_free(normalized); g_free(tag); } return res; }
static void ipoddisk_encode_name (gchar **strpp) { gchar *old = *strpp; gchar *nstr; /* normalized utf-8 string */ int i; if (old == NULL) return; /* Encode path names to appease Finder: * 0. leading . is treated as hidden file, encode as _ * 1. slash is Unix path separator, encode as : * 2. \r and \n are problematic, encode as space * Then normalize the string to cope with tricky * things like umlaut */ for (i = 0; i < strlen(old); i++) { if (i == 0 && old[i] == '.') old[i] = '_'; else if (old[i] == '/') old[i] = ':'; else if (old[i] == '\r' || old[i] == '\n') old[i] = ' '; } nstr = g_utf8_normalize(old, -1, G_NORMALIZE_NFD); if (nstr) { g_free(old); *strpp = nstr; } return; }
/* Convert a string in UTF-8 to legacy encoding, escaping those characters that * fail to convert. */ char *str_utf8_to_escaped_legacy(const char *string, const char *legacy_encoding) { if(string == NULL || legacy_encoding == NULL) { return NULL; } /* Convert the (possibly) decomposed UTF-8 string to composed form (eg, * Ä is converted to a single precomposed character instead of a base * character with a combining accent). This is required for the following * conversion to legacy (Windows-1252) encoding. */ char *utf8_composed_string = g_utf8_normalize(string, -1, G_NORMALIZE_DEFAULT_COMPOSE); if(utf8_composed_string == NULL) { WARNING("input string not valid UTF-8"); return NULL; } char *legacy_string = iconv_string_escaped(utf8_composed_string, -1, "UTF-8", legacy_encoding); free(utf8_composed_string); return legacy_string; }
/** * Case insensitive substring search for a completion match. * * Based on the default matching function in GtkEntryCompletion. * * This function is called once for each iter in the GtkEntryCompletion's * list of completion entries (model). * * @param completion Completion object to apply this function on * @param key Complete string from the GtkEntry. * @param iter Item in list of autocomplete database to compare key against. * @param user_data Unused. */ static gboolean on_match_func (GtkEntryCompletion *completion, const gchar *key, GtkTreeIter *iter, gpointer user_data) { gchar *item = NULL; gchar *normalized_string; gchar *case_normalized_string; gboolean ret = FALSE; GtkTreeModel *model = gtk_entry_completion_get_model (completion); GtkEditable *e = (GtkEditable*) gtk_entry_completion_get_entry(completion); gint cur_pos = gtk_editable_get_position(e); /* returns 1..* */ gint p = cur_pos; gint var_start; gboolean var_present = FALSE; for (p = cur_pos; p >= 0; p--) { gchar *ss = gtk_editable_get_chars(e, p, cur_pos); if (strncmp(ss, "$(", 2) == 0) { var_start = p+2; var_present = TRUE; g_free(ss); break; } g_free(ss); } if (var_present) { gchar *varname = gtk_editable_get_chars(e, var_start, cur_pos); gtk_tree_model_get (model, iter, COMPL_VARNAME, &item, -1); if (item != NULL) { // Do utf8-safe case insensitive string compare. // Shamelessly stolen from GtkEntryCompletion. normalized_string = g_utf8_normalize (item, -1, G_NORMALIZE_ALL); if (normalized_string != NULL) { case_normalized_string = g_utf8_casefold (normalized_string, -1); if (!g_ascii_strncasecmp(varname, case_normalized_string, strlen (varname))) ret = TRUE; g_free (case_normalized_string); } g_free (normalized_string); } g_free (varname); } g_free (item); return ret; }
static inline gchar * normalize_name (const gchar *name) { gchar *normalized_name = g_utf8_normalize (name, -1, G_NORMALIZE_DEFAULT); gchar *casefold_name = g_utf8_casefold (normalized_name, -1); g_free (normalized_name); return casefold_name; }
static char* process_str (const char *str, gboolean xapian_esc, gboolean query_esc) { GString *gstr; char *norm, *cur; gboolean is_field, is_range_field; norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL); if (G_UNLIKELY(!norm)) { /* not valid utf8? */ char *u8; u8 = mu_str_utf8ify (str); norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL); g_free (u8); } if (!norm) return NULL; /* msg-id needs some special care in queries */ if (query_esc && is_msgid_field (str)) return mu_str_process_msgid (str, TRUE); check_for_field (str, &is_field, &is_range_field); gstr = g_string_sized_new (strlen (norm)); for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) { gunichar uc; uc = g_utf8_get_char (cur); if (xapian_esc) if (handle_esc_maybe (gstr, &cur, uc, query_esc, is_range_field)) continue; if (g_unichar_ismark(uc)) continue; if (!is_range_field) uc = g_unichar_tolower (uc); g_string_append_unichar (gstr, uc); } g_free (norm); return g_string_free (gstr, FALSE); }
static char * str_utf8_create_search_needle (const char *needle, int case_sen) { char *fold, *result; if (needle == NULL) return NULL; if (case_sen) return g_utf8_normalize (needle, -1, G_NORMALIZE_ALL); fold = g_utf8_casefold (needle, -1); result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); g_free (fold); return result; }
static gchar * ev_page_accessible_get_selection (AtkText *text, gint selection_num, gint *start_pos, gint *end_pos) { EvPageAccessible *self = EV_PAGE_ACCESSIBLE (text); EvView *view = ev_page_accessible_get_view (self); gchar *selected_text = NULL; gchar *normalized_text = NULL; GList *l; *start_pos = -1; *end_pos = -1; if (selection_num != 0) return NULL; if (!EV_IS_SELECTION (view->document) || !view->selection_info.selections) return NULL; for (l = view->selection_info.selections; l != NULL; l = l->next) { EvViewSelection *selection = (EvViewSelection *)l->data; gint start, end; if (selection->page != self->priv->page) continue; if (get_selection_bounds (view, selection, &start, &end) && start != end) { EvPage *page; page = ev_document_get_page (view->document, selection->page); ev_document_doc_mutex_lock (); selected_text = ev_selection_get_selected_text (EV_SELECTION (view->document), page, selection->style, &(selection->rect)); ev_document_doc_mutex_unlock (); g_object_unref (page); *start_pos = start; *end_pos = end; } break; } if (selected_text) { normalized_text = g_utf8_normalize (selected_text, -1, G_NORMALIZE_NFKC); g_free (selected_text); } return normalized_text; }
static char *prepare(const char *str) { const char *str_norm = g_utf8_normalize(str, -1, G_NORMALIZE_ALL_COMPOSE); char *retval = g_utf8_casefold(str_norm, -1); g_free((void *)str_norm); return retval; }
gsize levenshtein_safe_strcmp(const gchar * s, const gchar * t) { gsize rc = 100; if(g_utf8_validate(s,-1,NULL) == FALSE || g_utf8_validate(t,-1,NULL) == FALSE) return rc; gchar * s_norm = g_utf8_normalize(s, -1 ,G_NORMALIZE_ALL_COMPOSE); gchar * t_norm = g_utf8_normalize(t, -1, G_NORMALIZE_ALL_COMPOSE); rc = levenshtein_strcmp(s_norm, t_norm); g_free(s_norm); g_free(t_norm); return rc; }
static void forward_chars_with_skipping (GtkTextIter *iter, gint count, gboolean skip_invisible, gboolean skip_nontext, gboolean skip_decomp) { gint i; g_return_if_fail (count >= 0); i = count; while (i > 0) { gboolean ignored = FALSE; /* minimal workaround to avoid the infinite loop of bug #168247. * It doesn't fix the problemjust the symptom... */ if (gtk_text_iter_is_end (iter)) return; if (skip_nontext && gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR) ignored = TRUE; /* FIXME: char_is_invisible() gets list of tags for each char there, and checks every tag. It doesn't sound like a good idea. */ if (!ignored && skip_invisible && char_is_invisible (iter)) ignored = TRUE; if (!ignored && skip_decomp) { /* being UTF8 correct sucks; this accounts for extra offsets coming from canonical decompositions of UTF8 characters (e.g. accented characters) which g_utf8_normalize() performs */ gchar *normal; gchar *casefold; gchar buffer[6]; gint buffer_len; buffer_len = g_unichar_to_utf8 (gtk_text_iter_get_char (iter), buffer); casefold = g_utf8_casefold (buffer, buffer_len); normal = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD); i -= (g_utf8_strlen (normal, -1) - 1); g_free (normal); g_free (casefold); } gtk_text_iter_forward_char (iter); if (!ignored) --i; } }
static void add_event_to_queue (WTStatus *status, int type, const char *path, const char *new_path) { char *nfc_path = NULL, *nfc_new_path = NULL; if (path) nfc_path = g_utf8_normalize (path, -1, G_NORMALIZE_NFC); if (new_path) nfc_new_path = g_utf8_normalize (new_path, -1, G_NORMALIZE_NFC); WTEvent *event = wt_event_new (type, nfc_path, nfc_new_path); g_free (nfc_path); g_free (nfc_new_path); char *name; switch (type) { case WT_EVENT_CREATE_OR_UPDATE: name = "create/update"; break; case WT_EVENT_DELETE: name = "delete"; break; case WT_EVENT_RENAME: name = "rename"; break; case WT_EVENT_OVERFLOW: name = "overflow"; break; case WT_EVENT_ATTRIB: name = "attribute change"; break; default: name = "unknown"; } seaf_debug ("Adding event: %s, %s %s\n", name, path, new_path?new_path:""); pthread_mutex_lock (&status->q_lock); g_queue_push_tail (status->event_q, event); pthread_mutex_unlock (&status->q_lock); }
static gchar * g_utf8_strrcasestr (const gchar *haystack, const gchar *needle) { gsize needle_len; gsize haystack_len; gchar *ret = NULL; gchar *p; gchar *casefold; gchar *caseless_haystack; gint i; g_return_val_if_fail (haystack != NULL, NULL); g_return_val_if_fail (needle != NULL, NULL); casefold = g_utf8_casefold (haystack, -1); caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); g_free (casefold); needle_len = g_utf8_strlen (needle, -1); haystack_len = g_utf8_strlen (caseless_haystack, -1); if (needle_len == 0) { ret = (gchar *)haystack; goto finally_1; } if (haystack_len < needle_len) { ret = NULL; goto finally_1; } haystack_len = strlen (caseless_haystack); needle_len = strlen (needle); p = (gchar *)caseless_haystack + haystack_len - needle_len; i = haystack_len - needle_len; while (p >= caseless_haystack) { if (strncasecmp (p, needle, needle_len) == 0) { ret = g_utf8_offset_to_pointer (haystack, i); goto finally_1; } p = g_utf8_prev_char (p); i--; } finally_1: g_free (caseless_haystack); return ret; }
static FSEventStreamRef add_watch (SeafWTMonitor *monitor, const char* repo_id, const char* worktree) { SeafWTMonitorPriv *priv = monitor->priv; RepoWatchInfo *info; double latency = 0.25; /* unit: second */ char *worktree_nfd = g_utf8_normalize (worktree, -1, G_NORMALIZE_NFD); CFStringRef mypaths[1]; mypaths[0] = CFStringCreateWithCString (kCFAllocatorDefault, worktree_nfd, kCFStringEncodingUTF8); g_free (worktree_nfd); CFArrayRef pathsToWatch = CFArrayCreate(NULL, (const void **)mypaths, 1, NULL); FSEventStreamRef stream; /* Create the stream, passing in a callback */ seaf_debug("Use kFSEventStreamCreateFlagWatchRoot\n"); // kFSEventStreamCreateFlagFileEvents does not work for libraries with name // containing accent characters. struct FSEventStreamContext ctx = {0, monitor, NULL, NULL, NULL}; stream = FSEventStreamCreate(kCFAllocatorDefault, stream_callback, &ctx, pathsToWatch, kFSEventStreamEventIdSinceNow, latency, kFSEventStreamCreateFlagWatchRoot ); CFRelease (mypaths[0]); CFRelease (pathsToWatch); if (!stream) { seaf_warning ("[wt] Failed to create event stream.\n"); return stream; } FSEventStreamScheduleWithRunLoop(stream, CFRunLoopGetCurrent(), kCFRunLoopDefaultMode); FSEventStreamStart (stream); /* FSEventStreamShow (stream); */ seaf_debug ("[wt mon] Add repo %s watch success: %s.\n", repo_id, worktree); pthread_mutex_lock (&priv->hash_lock); g_hash_table_insert (priv->handle_hash, g_strdup(repo_id), (gpointer)(long)stream); info = create_repo_watch_info (repo_id, worktree); g_hash_table_insert (priv->info_hash, (gpointer)(long)stream, info); pthread_mutex_unlock (&priv->hash_lock); /* An empty path indicates repo-mgr to scan the whole worktree. */ add_event_to_queue (info->status, WT_EVENT_CREATE_OR_UPDATE, "", NULL); return stream; }
static const gchar * utf8_strrcasestr (const gchar *haystack, const gchar *needle) { gsize needle_len; gsize haystack_len; const gchar *ret = NULL; gchar *p; gchar *casefold; gchar *caseless_haystack; gint i; g_return_val_if_fail (haystack != NULL, NULL); g_return_val_if_fail (needle != NULL, NULL); casefold = g_utf8_casefold (haystack, -1); caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD); g_free (casefold); needle_len = g_utf8_strlen (needle, -1); haystack_len = g_utf8_strlen (caseless_haystack, -1); if (needle_len == 0) { ret = (gchar *)haystack; goto finally_1; } if (haystack_len < needle_len) { ret = NULL; goto finally_1; } i = haystack_len - needle_len; p = g_utf8_offset_to_pointer (caseless_haystack, i); needle_len = strlen (needle); while (p >= caseless_haystack) { if (exact_prefix_cmp (p, needle, needle_len)) { ret = pointer_from_offset_skipping_decomp (haystack, i); goto finally_1; } p = g_utf8_prev_char (p); i--; } finally_1: g_free (caseless_haystack); return ret; }
static SearchThreadData * search_thread_data_new (NemoSearchEngineSimple *engine, NemoQuery *query) { SearchThreadData *data; char *text, *lower, *normalized, *uri; GFile *location; gint n, i; data = g_new0 (SearchThreadData, 1); data->engine = engine; data->directories = g_queue_new (); data->visited = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL); uri = nemo_query_get_location (query); location = NULL; if (uri != NULL) { location = g_file_new_for_uri (uri); g_free (uri); } if (location == NULL) { location = g_file_new_for_path ("/"); } g_queue_push_tail (data->directories, location); text = nemo_query_get_text (query); normalized = g_utf8_normalize (text, -1, G_NORMALIZE_NFD); lower = g_utf8_strdown (normalized, -1); data->words = strsplit_esc_n (lower, ' ', '\\', -1, &n); g_free (text); g_free (lower); g_free (normalized); data->word_strstr = g_malloc(sizeof(gboolean)*n); data->words_and = TRUE; for (i = 0; data->words[i] != NULL; i++) { data->word_strstr[i]=TRUE; text = data->words[i]; while(*text!=0) { if(*text=='\\' || *text=='?' || *text=='*') { data->word_strstr[i]=FALSE; break; } text++; } if (!data->word_strstr[i]) data->words_and = FALSE; } data->mime_types = nemo_query_get_mime_types (query); data->cancellable = g_cancellable_new (); return data; }
static const gchar * g_utf8_strcasestr (const gchar *haystack, const gchar *needle) { gsize needle_len; gsize haystack_len; const gchar *ret = NULL; gchar *p; gchar *casefold; gchar *caseless_haystack; gint i; g_return_val_if_fail (haystack != NULL, NULL); g_return_val_if_fail (needle != NULL, NULL); casefold = g_utf8_casefold (haystack, -1); caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD); g_free (casefold); needle_len = g_utf8_strlen (needle, -1); haystack_len = g_utf8_strlen (caseless_haystack, -1); if (needle_len == 0) { ret = (gchar *)haystack; goto finally_1; } if (haystack_len < needle_len) { ret = NULL; goto finally_1; } p = (gchar*)caseless_haystack; needle_len = strlen (needle); i = 0; while (*p) { if ((strncmp (p, needle, needle_len) == 0)) { ret = pointer_from_offset_skipping_decomp (haystack, i); goto finally_1; } p = g_utf8_next_char (p); i++; } finally_1: g_free (caseless_haystack); return ret; }
gchar *cleaned_string(gchar *string) { gchar *stripped, *normalized, *lower; stripped = albumart_strip_invalid_entities(string); normalized = g_utf8_normalize(stripped, -1, G_NORMALIZE_ALL); g_free(stripped); lower = g_utf8_strdown(normalized, -1); g_free(normalized); return lower; }
void gnc_quickfill_remove (QuickFill *qf, const gchar *text, QuickFillSort sort) { gchar *normalized_str; if (qf == NULL) return; if (text == NULL) return; normalized_str = g_utf8_normalize (text, -1, G_NORMALIZE_NFC); gnc_quickfill_remove_recursive (qf, normalized_str, 0, sort); g_free (normalized_str); }
static char * str_utf8_normalize (const char *text) { GString *fixed = g_string_new (""); char *tmp; char *result; const char *start; const char *end; start = text; while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') { if (start != end) { tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL); g_string_append (fixed, tmp); g_free (tmp); } g_string_append_c (fixed, end[0]); start = end + 1; } if (start == text) { result = g_utf8_normalize (text, -1, G_NORMALIZE_ALL); } else { if (start[0] != '\0' && start != end) { tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL); g_string_append (fixed, tmp); g_free (tmp); } result = g_strdup (fixed->str); } g_string_free (fixed, TRUE); return result; }