static gboolean gw_spellcheck_is_common_script (const gchar *TEXT) { if (TEXT == NULL) return FALSE; GUnicodeScript script; gunichar c; const gchar *ptr; gboolean is_script; ptr = TEXT; is_script = TRUE; while (*ptr != '\0' && is_script == TRUE) { c = g_utf8_get_char (ptr); script = g_unichar_get_script (c); if (script != G_UNICODE_SCRIPT_COMMON && script != G_UNICODE_SCRIPT_LATIN) { is_script = FALSE; } ptr = g_utf8_next_char (ptr); } return is_script; }
///! ///! @brief This allows searching for fish, and getting bigfish back if the morphology parser doesn't catch it ///! static void _lw_index_index_subkeys_by_type (LwIndex *index, LwIndexTableType type, const gchar *KEY) { //Sanity checks g_return_val_if_fail (index != NULL, NULL); g_return_val_if_fail (KEY != NULL, NULL); //Declarations gchar *key_copy = NULL; gchar *ptr = NULL; const gchar *subkey = NULL; //Initializations key_copy = g_strdup (KEY); if (key_copy == NULL) goto errored; ptr = key_copy; if (*ptr != '\0') ptr = g_utf8_next_char (ptr); //Search for substrings by removing the front characters while (ptr != NULL && *ptr != '\0') { ptr = g_utf8_next_char (ptr); subkey = ptr; GUnicodeScript script = g_unichar_get_script (g_utf8_get_char (subkey)); gboolean has_relevance = (*subkey != '\0' && (g_utf8_strlen (subkey, 9) > 2 || script == G_UNICODE_SCRIPT_HAN)); if (has_relevance && g_hash_table_contains (index->table[type], subkey)) { _lw_index_concat_masterkey_to_subkey (index, type, KEY, subkey); } } //Search for substring by removing the back characters subkey = key_copy; GUnicodeScript script = g_unichar_get_script (g_utf8_get_char (subkey)); while (ptr > subkey) { ptr = g_utf8_prev_char (ptr); *ptr = '\0'; gboolean has_relevance = (*subkey != '\0' && (g_utf8_strlen (subkey, 9) > 2 || script == G_UNICODE_SCRIPT_HAN)); if (has_relevance && g_hash_table_contains (index->table[type], subkey)) { _lw_index_concat_masterkey_to_subkey (index, type, KEY, subkey); } } errored: if (key_copy != NULL) g_free (key_copy); key_copy = NULL; }
static gboolean check_part (struct mime_text_part *part, gboolean raw_mode) { guchar *p, *p1; gunichar c, t; GUnicodeScript scc, sct; guint32 mark = 0, total = 0, max = 0, i; guint32 remain = part->content->len; guint32 scripts[G_UNICODE_SCRIPT_NKO]; GUnicodeScript sel = 0; p = part->content->data; if (IS_PART_UTF (part) || raw_mode) { while (remain > 1) { if ((g_ascii_isalpha (*p) && (*(p + 1) & 0x80)) || ((*p & 0x80) && g_ascii_isalpha (*(p + 1)))) { mark++; total++; } /* Current and next symbols are of one class */ else if (((*p & 0x80) && (*(p + 1) & 0x80)) || (g_ascii_isalpha (*p) && g_ascii_isalpha (*(p + 1)))) { total++; } p++; remain--; } } else { memset (&scripts, 0, sizeof (scripts)); while (remain > 0) { c = g_utf8_get_char_validated (p, remain); if (c == (gunichar) - 2 || c == (gunichar) - 1) { /* Invalid characters detected, stop processing */ return FALSE; } scc = g_unichar_get_script (c); if (scc < (gint)G_N_ELEMENTS (scripts)) { scripts[scc]++; } p1 = g_utf8_next_char (p); remain -= p1 - p; p = p1; if (remain > 0) { t = g_utf8_get_char_validated (p, remain); if (t == (gunichar) - 2 || t == (gunichar) - 1) { /* Invalid characters detected, stop processing */ return FALSE; } sct = g_unichar_get_script (t); if (g_unichar_isalpha (c) && g_unichar_isalpha (t)) { /* We have two unicode alphanumeric characters, so we can check its script */ if (sct != scc) { mark++; } total++; } p1 = g_utf8_next_char (p); remain -= p1 - p; p = p1; } } /* Detect the mostly charset of this part */ for (i = 0; i < G_N_ELEMENTS (scripts); i++) { if (scripts[i] > max) { max = scripts[i]; sel = i; } } part->script = sel; } if (total == 0) { return 0; } return ((double)mark / (double)total) > chartable_module_ctx->threshold; }
/** * pango_script_for_unichar: * @ch: a Unicode character * * Looks up the #PangoScript for a particular character (as defined by * Unicode Standard Annex #24). No check is made for @ch being a * valid Unicode character; if you pass in invalid character, the * result is undefined. * * As of Pango 1.18, this function simply returns the return value of * g_unichar_get_script(). * * Return value: the #PangoScript for the character. * * Since: 1.4 **/ PangoScript pango_script_for_unichar (gunichar ch) { return g_unichar_get_script (ch); }
/** * pango_script_iter_next: * @iter: a #PangoScriptIter * * Advances a #PangoScriptIter to the next range. If @iter * is already at the end, it is left unchanged and %FALSE * is returned. * * Return value: %TRUE if @iter was successfully advanced. * * Since: 1.4 **/ gboolean pango_script_iter_next (PangoScriptIter *iter) { int start_sp; if (iter->script_end == iter->text_end) return FALSE; start_sp = iter->paren_sp; iter->script_code = PANGO_SCRIPT_COMMON; iter->script_start = iter->script_end; for (; iter->script_end < iter->text_end; iter->script_end = g_utf8_next_char (iter->script_end)) { gunichar ch = g_utf8_get_char (iter->script_end); PangoScript sc; int pair_index; sc = g_unichar_get_script (ch); if (sc != PANGO_SCRIPT_COMMON) pair_index = -1; else pair_index = get_pair_index (ch); /* * Paired character handling: * * if it's an open character, push it onto the stack. * if it's a close character, find the matching open on the * stack, and use that script code. Any non-matching open * characters above it on the stack will be poped. */ if (pair_index >= 0) { if (IS_OPEN (pair_index)) { /* * If the paren stack is full, empty it. This * means that deeply nested paired punctuation * characters will be ignored, but that's an unusual * case, and it's better to ignore them than to * write off the end of the stack... */ if (++iter->paren_sp >= PAREN_STACK_DEPTH) iter->paren_sp = 0; iter->paren_stack[iter->paren_sp].pair_index = pair_index; iter->paren_stack[iter->paren_sp].script_code = iter->script_code; } else if (iter->paren_sp >= 0) { int pi = pair_index & ~1; while (iter->paren_sp >= 0 && iter->paren_stack[iter->paren_sp].pair_index != pi) iter->paren_sp--; if (iter->paren_sp < start_sp) start_sp = iter->paren_sp; if (iter->paren_sp >= 0) sc = iter->paren_stack[iter->paren_sp].script_code; } } if (SAME_SCRIPT (iter->script_code, sc)) { if (!REAL_SCRIPT (iter->script_code) && REAL_SCRIPT (sc)) { iter->script_code = sc; /* * now that we have a final script code, fix any open * characters we pushed before we knew the script code. */ while (start_sp < iter->paren_sp) iter->paren_stack[++start_sp].script_code = iter->script_code; } /* * if this character is a close paired character, * pop it from the stack */ if (pair_index >= 0 && !IS_OPEN (pair_index) && iter->paren_sp >= 0) { iter->paren_sp--; if (iter->paren_sp < start_sp) start_sp = iter->paren_sp; } } else { /* Different script, we're done */ break; } } return TRUE; }