gchar* mu_str_xapian_fixup_terms (const gchar *term) { gboolean is_field, is_range_field; const gchar *cond, *pfx, *sfx; gchar *retval; fixup_handler_t fixup; g_return_val_if_fail (term, NULL); if (strlen (term) == 0) return g_strdup (term); check_for_field (term, &is_field, &is_range_field); if (!is_field || !is_range_field) return g_strdup (term); if (!split_term (term, &pfx, &cond, &sfx)) return g_strdup (term); retval = NULL; fixup = find_fixup (cond); if (fixup) retval = fixup (pfx, cond, sfx); if (!retval) retval = g_strdup (term); /* At this point retval should contain the result */ g_free ((gchar *)pfx); g_free ((gchar *)sfx); g_free ((gchar *)cond); return retval; }
/* * Xapian treats various characters such as '@', '-', ':' and '.' * specially; function below is an ugly hack to make it DWIM in most * cases... * * function expects search terms (not complete queries) * */ char* mu_str_xapian_escape_in_place_try (char *term, gboolean esc_space, GStringChunk *strchunk) { unsigned char *cur; const char escchar = '_'; gboolean is_field, is_range_field; unsigned colon; g_return_val_if_fail (term, NULL); check_for_field (term, &is_field, &is_range_field); for (colon = 0, cur = (unsigned char*)term; *cur; ++cur) { switch (*cur) { case '.': /* escape '..' if it's not a range field*/ if (is_range_field && cur[1] == '.') cur += 1; else *cur = escchar; break; case ':': /* if there's a registered xapian prefix * before the *first* ':', don't touch * it. Otherwise replace ':' with '_'... ugh * yuck ugly... */ if (colon != 0 || !is_field) *cur = escchar; ++colon; break; case '(': case ')': case '\'': case '*': /* wildcard */ break; default: /* escape all other special stuff */ if (*cur < 0x80 && !isalnum (*cur)) *cur = escchar; } } /* downcase try to remove accents etc. */ return mu_str_normalize_in_place_try (term, TRUE, strchunk); }
static char* process_str (const char *str, gboolean xapian_esc, gboolean query_esc) { GString *gstr; char *norm, *cur; gboolean is_field, is_range_field; norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL); if (G_UNLIKELY(!norm)) { /* not valid utf8? */ char *u8; u8 = mu_str_utf8ify (str); norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL); g_free (u8); } if (!norm) return NULL; /* msg-id needs some special care in queries */ if (query_esc && is_msgid_field (str)) return mu_str_process_msgid (str, TRUE); check_for_field (str, &is_field, &is_range_field); gstr = g_string_sized_new (strlen (norm)); for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) { gunichar uc; uc = g_utf8_get_char (cur); if (xapian_esc) if (handle_esc_maybe (gstr, &cur, uc, query_esc, is_range_field)) continue; if (g_unichar_ismark(uc)) continue; if (!is_range_field) uc = g_unichar_tolower (uc); g_string_append_unichar (gstr, uc); } g_free (norm); return g_string_free (gstr, FALSE); }