Пример #1
0
void
PinyinPhraseLib::find_phrases_impl (PhraseVector &pv,
									const PinyinPhraseOffsetVector::iterator &begin,
									const PinyinPhraseOffsetVector::iterator &end,
									const PinyinKeyVector::const_iterator &key_begin,
									const PinyinKeyVector::const_iterator &key_pos,
									const PinyinKeyVector::const_iterator &key_end)
{
	if (begin == end) return;

	if (key_pos == key_begin) {
		for (PinyinPhraseOffsetVector::iterator i=begin; i!=end; i++) {
			if (valid_pinyin_phrase (i->first, i->second) &&
				get_phrase (i->first).is_enable ())
				pv.push_back (get_phrase (i->first));
		}
		return;
	}

	std::sort (begin, end, PinyinPhraseLessThanByOffsetSP (this, m_pinyin_key_less, key_pos-key_begin));

	std::pair<PinyinPhraseOffsetVector::iterator, PinyinPhraseOffsetVector::iterator> it =
		std::equal_range (begin, end, *key_pos,
						  PinyinPhraseLessThanByOffsetSP (this, m_pinyin_key_less, key_pos-key_begin));

	return find_phrases_impl (pv, it.first, it.second, key_begin, key_pos-1, key_end);
}
Пример #2
0
static void handle_phrase_space()
{
    static U8 s_first_tap_was_on_empty_step;
    U8 phrase = get_phrase();
    release_space_handler_p = release_space_handler;

    if (g_marking) return;

    switch (g_space_tap_count)
    {
        case 1:
            s_first_tap_was_on_empty_step = (0xffu == phrase);

            if (s_first_tap_was_on_empty_step)
            {
                set_phrase(last_input_phrase[EDIT_CH]);
            }
            else 
            {
                last_input_phrase[EDIT_CH] = phrase;
            }
            break;

        case 2:
            new_or_clone(s_first_tap_was_on_empty_step);
            break;
    }
}
Пример #3
0
bool
PinyinPhraseLib::insert_pinyin_phrase_into_index (uint32 phrase_index, uint32 pinyin_index)
{
	if (!valid_pinyin_phrase (phrase_index, pinyin_index))
		return false;

	uint32 len = get_phrase (phrase_index).length();

	if (len <= 0) return false;

	PinyinKey key = get_pinyin_key (pinyin_index);

	PinyinPhraseTable::iterator ptit= 
		std::lower_bound (m_phrases[len-1].begin (), m_phrases[len-1].end (), key, PinyinKeyExactLessThan ());

	if (ptit != m_phrases[len-1].end () && PinyinKeyExactEqualTo () (*ptit,key)) {
		ptit->get_vector ().push_back (PinyinPhraseOffsetPair (phrase_index, pinyin_index));
	} else {
		PinyinPhraseEntry entry (key);
		entry.get_vector ().push_back (PinyinPhraseOffsetPair (phrase_index, pinyin_index));

		if (ptit != m_phrases [len-1].end () &&
			ptit >= m_phrases [len-1].begin () &&
			m_phrases[len-1].size () > 0) {
			m_phrases[len-1].insert (ptit, entry);
		} else {
			m_phrases[len-1].push_back (entry);
		}
	}
	return true;
}
Пример #4
0
void
PinyinPhraseLib::dump_content (std::ostream &os, int minlen, int maxlen)
{
	PinyinPhraseLessThanByOffset less_op (this, m_pinyin_key_less);
	if (minlen < 1) minlen = 1;
	if (maxlen > SCIM_PHRASE_MAX_LENGTH) maxlen = SCIM_PHRASE_MAX_LENGTH;

	for (int i = minlen; i <= maxlen; ++ i) {
		PinyinPhraseOffsetVector offsets;
		for (PinyinPhraseTable::iterator tit = m_phrases [i-1].begin (); tit != m_phrases [i-1].end (); ++ tit) {
			PinyinPhraseOffsetVector::iterator begin = tit->get_vector ().begin ();
			PinyinPhraseOffsetVector::iterator end = tit->get_vector ().end ();
			offsets.insert (offsets.end (), begin, end);
		}

		std::sort (offsets.begin (), offsets.end (), less_op);

		for (PinyinPhraseOffsetVector::iterator oit = offsets.begin (); oit != offsets.end (); ++ oit) {
			bool before = false, after = false;

			os << get_phrase (oit->first).frequency () << "\t";
			if (oit > offsets.begin () && get_phrase ((oit-1)->first) == get_phrase (oit->first)) before = true;
			if (oit < offsets.end () - 1 && get_phrase ((oit+1)->first) == get_phrase (oit->first)) after = true;
			if (before || after) os << "+";
			else os << "-";
			os << utf8_wcstombs (get_phrase (oit->first).get_content ());
			os << " =";
			for (unsigned int j = 0; j < get_phrase (oit->first).length (); ++ j)
				os << " " << get_pinyin_key (oit->second + j);
			os << "\n";
		}
	}
}
Пример #5
0
U8 get_next_phrase()
{
    U8 retval;
    if (CUR_ROW_CHAIN == 0xf)
        return 0xffu;
    ++CUR_ROW_CHAIN;
    retval = get_phrase();
    --CUR_ROW_CHAIN;
    return retval;
}
Пример #6
0
U8 get_prev_phrase()
{
    U8 retval;
    if (!CUR_ROW_CHAIN)
        return 0xffu;
    --CUR_ROW_CHAIN;
    retval = get_phrase();
    ++CUR_ROW_CHAIN;
    return retval;
}
Пример #7
0
 int
 bos_reader::get_phrase_str (char **next, char *buf, char delim) const
 {
   trim_left (next);
   char *start = *next;
   if (get_phrase (next, delim))
     return -2;
   //printf ("NAMEMMMM: %s\n", start);
   if (*start == '*' || *start == '\0')
     return 0;
   strcpy (buf, start);
   return 0;
 }
Пример #8
0
 int
 bos_reader::get_phrase_int (char **next, t_long *i, char delim) const
 {
   trim_left (next);
   char *start = *next;
   long t;
   if (get_phrase (next, delim))
     return -2;
   if (*start == '*' || *start == '\0')
     return 0;
   if (sscanf (start, "%ld", &t) < 1)
     {
       fprintf (stderr, "Error: can not read int from %s\n", start);
       return -1;
     }
   *i = (t_long)t;
   return 0;
 }
Пример #9
0
static void new_or_clone(U8 do_new)
{
    U8 phrase;
    if (do_new)
    {
        // No phrase on step, alloc new.
        phrase = next_new_phrase();
    }
    else
    {
        // Clone existing chain.
        phrase = clone_phrase(get_phrase());
    }
    if (phrase == 0xffu)
        return;
    last_input_phrase[EDIT_CH] = phrase;
    alloc_phrase(phrase);
    set_phrase(phrase);
}
Пример #10
0
 int
 bos_reader::get_phrase_double (char **next, t_double *d, char delim) const
 {
   trim_left (next);
   char *start = *next;
   double t;
   if (get_phrase (next, delim))
     {
       //printf ("get_phrase kkkkkk\n");
       return -2;
     }
   if (*start == '*' || *start == '\0')
     return 0;
   if (sscanf (start, "%lf", &t) < 1)
     {
       fprintf (stderr, "Error: can not read double from %s\n", start);
       return -1;
     }
   *d = (t_double)t;
   return 0;
 }
Пример #11
0
inline static grn_cell *
get_token(grn_ctx *ctx, grn_query *q)
{
  grn_cell *token = NIL;
  grn_operator op = q->default_op;
  {
    int weight = DEFAULT_WEIGHT, prefixp = 0, mode = -1, option = 0;
    skip_space(ctx, q);
    if (q->cur_expr >= q->max_exprs ||
        q->cur_cell >= q->max_cells ||
        q->cur >= q->str_end) { return NIL; }
    switch (*q->cur) {
    case '\0' :
      return NIL;
    case GRN_QUERY_PARENR :
      q->cur++;
      return NIL;
    case GRN_QUERY_QUOTEL :
      q->cur++;
      if ((token = get_phrase(ctx, q)) == NULL) {
	return NIL;
      }
      break;
    case GRN_QUERY_PREFIX :
      q->cur++;
      token = get_op(q, op, weight);
      break;
    case GRN_QUERY_AND :
      q->cur++;
      token = op_new(q, GRN_OP_AND, weight, mode, option);
      break;
    case GRN_QUERY_BUT :
      q->cur++;
      token = op_new(q, GRN_OP_BUT, weight, mode, option);
      break;
    case GRN_QUERY_ADJ_INC :
      q->cur++;
      if (weight < 127) { weight++; }
      token = op_new(q, GRN_OP_ADJUST, weight, mode, option);
      break;
    case GRN_QUERY_ADJ_DEC :
      q->cur++;
      if (weight > -128) { weight--; }
      token = op_new(q, GRN_OP_ADJUST, weight, mode, option);
      break;
    case GRN_QUERY_ADJ_NEG :
      q->cur++;
      token = op_new(q, GRN_OP_ADJUST, -1, mode, option);
      break;
    case GRN_QUERY_PARENL :
      q->cur++;
      token = get_expr(ctx, q);
      break;
    default :
      if ((token = get_word(ctx, q, &prefixp)) &&
          token->u.b.value[0] == 'O' &&
          token->u.b.value[1] == 'R' &&
          token->u.b.size == 2) {
        cell_del(q);
        q->cur_expr--;
        token = op_new(q, GRN_OP_OR, weight, mode, option);
      }
      break;
    }
  }
  return cons(q, token, NIL);
}
Пример #12
0
static void release_space_handler()
{
    alloc_phrase(get_phrase());
    release_space_handler_p = 0;
}
Пример #13
0
void
PinyinPhraseLib::refine_pinyin_lib ()
{
	PinyinKeyVector tmp_pinyin_lib;

	PinyinKeyVector::const_iterator result;
	PinyinKeyVector::const_iterator vit_begin;
	PinyinKeyVector::const_iterator vit_end;

	uint32 len;
	uint32 pinyin_offset;

	tmp_pinyin_lib.reserve (m_pinyin_lib.size () + 1);

	for (int i=SCIM_PHRASE_MAX_LENGTH-1; i>=0; i--) {
		for (PinyinPhraseTable::iterator tit=m_phrases[i].begin(); tit!=m_phrases[i].end(); tit++) {
			for (PinyinPhraseOffsetVector::iterator vit=tit->get_vector ().begin();
					vit!=tit->get_vector ().end(); vit++) {
				len = get_phrase (vit->first).length ();

				if (len > 0) {
					vit_begin = m_pinyin_lib.begin () + vit->second;
					vit_end   = vit_begin + len;

					for (result  = tmp_pinyin_lib.begin ();
						 result != tmp_pinyin_lib.end ();
						 result ++) {
						uint32 j;
						for (j=0; j< len && result + j < tmp_pinyin_lib.end (); j++) {
							if (!m_pinyin_key_equal (*(result+j), *(vit_begin + j)))
								break;
						}
						if (j == len)
							break;
					}

					/*
					result = std::find_end (tmp_pinyin_lib.begin (),
										  tmp_pinyin_lib.end (),
										  vit_begin,
										  vit_end,
										  m_pinyin_key_equal);
					*/

					if (result != tmp_pinyin_lib.end ())
						pinyin_offset = result - tmp_pinyin_lib.begin ();
					else {
						pinyin_offset = tmp_pinyin_lib.size ();
						for (uint32 j=0; j<len; j++)
							tmp_pinyin_lib.push_back (get_pinyin_key (vit->second + j));
					}
					vit->second = pinyin_offset;
				}
				std::cout << "." << std::flush;
			}
		}	
	}

	std::cout << "\n";

	m_pinyin_lib = tmp_pinyin_lib;
}