示例#1
0
static gboolean
gw_spellcheck_is_common_script (const gchar *TEXT)
{
    if (TEXT == NULL) return FALSE;

    GUnicodeScript script;
    gunichar c;
    const gchar *ptr;
    gboolean is_script;

    ptr = TEXT;
    is_script = TRUE;

    while (*ptr != '\0' && is_script == TRUE)
    {
      c = g_utf8_get_char (ptr);
      script = g_unichar_get_script (c);
      if (script != G_UNICODE_SCRIPT_COMMON && script != G_UNICODE_SCRIPT_LATIN)
      {
        is_script = FALSE;
      }
      ptr = g_utf8_next_char (ptr);
    }

    return is_script;
}
示例#2
0
文件: index.c 项目: zakkudo/gwaei
///!
///! @brief This allows searching for fish, and getting bigfish back if the morphology parser doesn't catch it
///!
static void
_lw_index_index_subkeys_by_type (LwIndex          *index, 
                                 LwIndexTableType  type, 
                                 const gchar      *KEY)
{
    //Sanity checks
    g_return_val_if_fail (index != NULL, NULL);
    g_return_val_if_fail (KEY != NULL, NULL);

    //Declarations
    gchar *key_copy = NULL;
    gchar *ptr = NULL;
    const gchar *subkey = NULL;

    //Initializations
    key_copy = g_strdup (KEY); if (key_copy == NULL) goto errored;
    ptr = key_copy; if (*ptr != '\0') ptr = g_utf8_next_char (ptr);

    //Search for substrings by removing the front characters
    while (ptr != NULL && *ptr != '\0')
    {
      ptr = g_utf8_next_char (ptr); subkey = ptr;
      GUnicodeScript script = g_unichar_get_script (g_utf8_get_char (subkey));
      gboolean has_relevance = (*subkey != '\0' && (g_utf8_strlen (subkey, 9) > 2 || script == G_UNICODE_SCRIPT_HAN));
      if (has_relevance && g_hash_table_contains (index->table[type], subkey))
      {
        _lw_index_concat_masterkey_to_subkey (index, type, KEY, subkey);
      }
    }

    //Search for substring by removing the back characters
    subkey = key_copy;
    GUnicodeScript script = g_unichar_get_script (g_utf8_get_char (subkey));

    while (ptr > subkey) {
      ptr = g_utf8_prev_char (ptr); *ptr = '\0';
      gboolean has_relevance = (*subkey != '\0' && (g_utf8_strlen (subkey, 9) > 2 || script == G_UNICODE_SCRIPT_HAN));
      if (has_relevance && g_hash_table_contains (index->table[type], subkey))
      {
        _lw_index_concat_masterkey_to_subkey (index, type, KEY, subkey);
      }
    }

errored:

    if (key_copy != NULL) g_free (key_copy); key_copy = NULL;
}
示例#3
0
static gboolean
check_part (struct mime_text_part *part, gboolean raw_mode)
{
	guchar *p, *p1;
	gunichar c, t;
	GUnicodeScript scc, sct;
	guint32 mark = 0, total = 0, max = 0, i;
	guint32 remain = part->content->len;
	guint32 scripts[G_UNICODE_SCRIPT_NKO];
	GUnicodeScript sel = 0;

	p = part->content->data;

	if (IS_PART_UTF (part) || raw_mode) {
		while (remain > 1) {
			if ((g_ascii_isalpha (*p) &&
				(*(p + 1) & 0x80)) ||
				((*p & 0x80) && g_ascii_isalpha (*(p + 1)))) {
				mark++;
				total++;
			}
			/* Current and next symbols are of one class */
			else if (((*p & 0x80) &&
				(*(p + 1) & 0x80)) ||
				(g_ascii_isalpha (*p) && g_ascii_isalpha (*(p + 1)))) {
				total++;
			}
			p++;
			remain--;
		}
	}
	else {
		memset (&scripts, 0, sizeof (scripts));
		while (remain > 0) {
			c = g_utf8_get_char_validated (p, remain);
			if (c == (gunichar) - 2 || c == (gunichar) - 1) {
				/* Invalid characters detected, stop processing */
				return FALSE;
			}

			scc = g_unichar_get_script (c);
			if (scc < (gint)G_N_ELEMENTS (scripts)) {
				scripts[scc]++;
			}
			p1 = g_utf8_next_char (p);
			remain -= p1 - p;
			p = p1;

			if (remain > 0) {
				t = g_utf8_get_char_validated (p, remain);
				if (t == (gunichar) - 2 || t == (gunichar) - 1) {
					/* Invalid characters detected, stop processing */
					return FALSE;
				}
				sct = g_unichar_get_script (t);
				if (g_unichar_isalpha (c) && g_unichar_isalpha (t)) {
					/* We have two unicode alphanumeric characters, so we can check its script */
					if (sct != scc) {
						mark++;
					}
					total++;
				}
				p1 = g_utf8_next_char (p);
				remain -= p1 - p;
				p = p1;
			}
		}
		/* Detect the mostly charset of this part */
		for (i = 0; i < G_N_ELEMENTS (scripts); i++) {
			if (scripts[i] > max) {
				max = scripts[i];
				sel = i;
			}
		}
		part->script = sel;
	}

	if (total == 0) {
		return 0;
	}

	return ((double)mark / (double)total) > chartable_module_ctx->threshold;
}
示例#4
0
/**
 * pango_script_for_unichar:
 * @ch: a Unicode character
 *
 * Looks up the #PangoScript for a particular character (as defined by
 * Unicode Standard Annex #24). No check is made for @ch being a
 * valid Unicode character; if you pass in invalid character, the
 * result is undefined.
 *
 * As of Pango 1.18, this function simply returns the return value of
 * g_unichar_get_script().
 *
 * Return value: the #PangoScript for the character.
 *
 * Since: 1.4
 **/
PangoScript
pango_script_for_unichar (gunichar ch)
{
  return g_unichar_get_script (ch);
}
示例#5
0
/**
 * pango_script_iter_next:
 * @iter: a #PangoScriptIter
 *
 * Advances a #PangoScriptIter to the next range. If @iter
 * is already at the end, it is left unchanged and %FALSE
 * is returned.
 *
 * Return value: %TRUE if @iter was successfully advanced.
 *
 * Since: 1.4
 **/
gboolean
pango_script_iter_next (PangoScriptIter *iter)
{
  int start_sp;

  if (iter->script_end == iter->text_end)
    return FALSE;

  start_sp = iter->paren_sp;
  iter->script_code = PANGO_SCRIPT_COMMON;
  iter->script_start = iter->script_end;

  for (; iter->script_end < iter->text_end; iter->script_end = g_utf8_next_char (iter->script_end))
    {
      gunichar ch = g_utf8_get_char (iter->script_end);
      PangoScript sc;
      int pair_index;

      sc = g_unichar_get_script (ch);
      if (sc != PANGO_SCRIPT_COMMON)
	pair_index = -1;
      else
	pair_index = get_pair_index (ch);

      /*
       * Paired character handling:
       *
       * if it's an open character, push it onto the stack.
       * if it's a close character, find the matching open on the
       * stack, and use that script code. Any non-matching open
       * characters above it on the stack will be poped.
       */
      if (pair_index >= 0)
	{
	  if (IS_OPEN (pair_index))
	    {
	      /*
	       * If the paren stack is full, empty it. This
	       * means that deeply nested paired punctuation
	       * characters will be ignored, but that's an unusual
	       * case, and it's better to ignore them than to
	       * write off the end of the stack...
	       */
	      if (++iter->paren_sp >= PAREN_STACK_DEPTH)
		iter->paren_sp = 0;

	      iter->paren_stack[iter->paren_sp].pair_index = pair_index;
	      iter->paren_stack[iter->paren_sp].script_code = iter->script_code;
	    }
	  else if (iter->paren_sp >= 0)
	    {
	      int pi = pair_index & ~1;

	      while (iter->paren_sp >= 0 && iter->paren_stack[iter->paren_sp].pair_index != pi)
		iter->paren_sp--;

	      if (iter->paren_sp < start_sp)
		start_sp = iter->paren_sp;

	      if (iter->paren_sp >= 0)
		sc = iter->paren_stack[iter->paren_sp].script_code;
	    }
	}

      if (SAME_SCRIPT (iter->script_code, sc))
	{
	  if (!REAL_SCRIPT (iter->script_code) && REAL_SCRIPT (sc))
	    {
	      iter->script_code = sc;

	      /*
	       * now that we have a final script code, fix any open
	       * characters we pushed before we knew the script code.
	       */
	      while (start_sp < iter->paren_sp)
		iter->paren_stack[++start_sp].script_code = iter->script_code;
	    }

	  /*
	   * if this character is a close paired character,
	   * pop it from the stack
	   */
	  if (pair_index >= 0 && !IS_OPEN (pair_index) && iter->paren_sp >= 0)
	    {
	      iter->paren_sp--;

	      if (iter->paren_sp < start_sp)
		start_sp = iter->paren_sp;
	    }
	}
      else
	{
	  /* Different script, we're done */
	  break;
	}
    }

  return TRUE;
}