void PinyinPhraseLib::find_phrases_impl (PhraseVector &pv, const PinyinPhraseOffsetVector::iterator &begin, const PinyinPhraseOffsetVector::iterator &end, const PinyinKeyVector::const_iterator &key_begin, const PinyinKeyVector::const_iterator &key_pos, const PinyinKeyVector::const_iterator &key_end) { if (begin == end) return; if (key_pos == key_begin) { for (PinyinPhraseOffsetVector::iterator i=begin; i!=end; i++) { if (valid_pinyin_phrase (i->first, i->second) && get_phrase (i->first).is_enable ()) pv.push_back (get_phrase (i->first)); } return; } std::sort (begin, end, PinyinPhraseLessThanByOffsetSP (this, m_pinyin_key_less, key_pos-key_begin)); std::pair<PinyinPhraseOffsetVector::iterator, PinyinPhraseOffsetVector::iterator> it = std::equal_range (begin, end, *key_pos, PinyinPhraseLessThanByOffsetSP (this, m_pinyin_key_less, key_pos-key_begin)); return find_phrases_impl (pv, it.first, it.second, key_begin, key_pos-1, key_end); }
static void handle_phrase_space() { static U8 s_first_tap_was_on_empty_step; U8 phrase = get_phrase(); release_space_handler_p = release_space_handler; if (g_marking) return; switch (g_space_tap_count) { case 1: s_first_tap_was_on_empty_step = (0xffu == phrase); if (s_first_tap_was_on_empty_step) { set_phrase(last_input_phrase[EDIT_CH]); } else { last_input_phrase[EDIT_CH] = phrase; } break; case 2: new_or_clone(s_first_tap_was_on_empty_step); break; } }
bool PinyinPhraseLib::insert_pinyin_phrase_into_index (uint32 phrase_index, uint32 pinyin_index) { if (!valid_pinyin_phrase (phrase_index, pinyin_index)) return false; uint32 len = get_phrase (phrase_index).length(); if (len <= 0) return false; PinyinKey key = get_pinyin_key (pinyin_index); PinyinPhraseTable::iterator ptit= std::lower_bound (m_phrases[len-1].begin (), m_phrases[len-1].end (), key, PinyinKeyExactLessThan ()); if (ptit != m_phrases[len-1].end () && PinyinKeyExactEqualTo () (*ptit,key)) { ptit->get_vector ().push_back (PinyinPhraseOffsetPair (phrase_index, pinyin_index)); } else { PinyinPhraseEntry entry (key); entry.get_vector ().push_back (PinyinPhraseOffsetPair (phrase_index, pinyin_index)); if (ptit != m_phrases [len-1].end () && ptit >= m_phrases [len-1].begin () && m_phrases[len-1].size () > 0) { m_phrases[len-1].insert (ptit, entry); } else { m_phrases[len-1].push_back (entry); } } return true; }
void PinyinPhraseLib::dump_content (std::ostream &os, int minlen, int maxlen) { PinyinPhraseLessThanByOffset less_op (this, m_pinyin_key_less); if (minlen < 1) minlen = 1; if (maxlen > SCIM_PHRASE_MAX_LENGTH) maxlen = SCIM_PHRASE_MAX_LENGTH; for (int i = minlen; i <= maxlen; ++ i) { PinyinPhraseOffsetVector offsets; for (PinyinPhraseTable::iterator tit = m_phrases [i-1].begin (); tit != m_phrases [i-1].end (); ++ tit) { PinyinPhraseOffsetVector::iterator begin = tit->get_vector ().begin (); PinyinPhraseOffsetVector::iterator end = tit->get_vector ().end (); offsets.insert (offsets.end (), begin, end); } std::sort (offsets.begin (), offsets.end (), less_op); for (PinyinPhraseOffsetVector::iterator oit = offsets.begin (); oit != offsets.end (); ++ oit) { bool before = false, after = false; os << get_phrase (oit->first).frequency () << "\t"; if (oit > offsets.begin () && get_phrase ((oit-1)->first) == get_phrase (oit->first)) before = true; if (oit < offsets.end () - 1 && get_phrase ((oit+1)->first) == get_phrase (oit->first)) after = true; if (before || after) os << "+"; else os << "-"; os << utf8_wcstombs (get_phrase (oit->first).get_content ()); os << " ="; for (unsigned int j = 0; j < get_phrase (oit->first).length (); ++ j) os << " " << get_pinyin_key (oit->second + j); os << "\n"; } } }
U8 get_next_phrase() { U8 retval; if (CUR_ROW_CHAIN == 0xf) return 0xffu; ++CUR_ROW_CHAIN; retval = get_phrase(); --CUR_ROW_CHAIN; return retval; }
U8 get_prev_phrase() { U8 retval; if (!CUR_ROW_CHAIN) return 0xffu; --CUR_ROW_CHAIN; retval = get_phrase(); ++CUR_ROW_CHAIN; return retval; }
int bos_reader::get_phrase_str (char **next, char *buf, char delim) const { trim_left (next); char *start = *next; if (get_phrase (next, delim)) return -2; //printf ("NAMEMMMM: %s\n", start); if (*start == '*' || *start == '\0') return 0; strcpy (buf, start); return 0; }
int bos_reader::get_phrase_int (char **next, t_long *i, char delim) const { trim_left (next); char *start = *next; long t; if (get_phrase (next, delim)) return -2; if (*start == '*' || *start == '\0') return 0; if (sscanf (start, "%ld", &t) < 1) { fprintf (stderr, "Error: can not read int from %s\n", start); return -1; } *i = (t_long)t; return 0; }
static void new_or_clone(U8 do_new) { U8 phrase; if (do_new) { // No phrase on step, alloc new. phrase = next_new_phrase(); } else { // Clone existing chain. phrase = clone_phrase(get_phrase()); } if (phrase == 0xffu) return; last_input_phrase[EDIT_CH] = phrase; alloc_phrase(phrase); set_phrase(phrase); }
int bos_reader::get_phrase_double (char **next, t_double *d, char delim) const { trim_left (next); char *start = *next; double t; if (get_phrase (next, delim)) { //printf ("get_phrase kkkkkk\n"); return -2; } if (*start == '*' || *start == '\0') return 0; if (sscanf (start, "%lf", &t) < 1) { fprintf (stderr, "Error: can not read double from %s\n", start); return -1; } *d = (t_double)t; return 0; }
inline static grn_cell * get_token(grn_ctx *ctx, grn_query *q) { grn_cell *token = NIL; grn_operator op = q->default_op; { int weight = DEFAULT_WEIGHT, prefixp = 0, mode = -1, option = 0; skip_space(ctx, q); if (q->cur_expr >= q->max_exprs || q->cur_cell >= q->max_cells || q->cur >= q->str_end) { return NIL; } switch (*q->cur) { case '\0' : return NIL; case GRN_QUERY_PARENR : q->cur++; return NIL; case GRN_QUERY_QUOTEL : q->cur++; if ((token = get_phrase(ctx, q)) == NULL) { return NIL; } break; case GRN_QUERY_PREFIX : q->cur++; token = get_op(q, op, weight); break; case GRN_QUERY_AND : q->cur++; token = op_new(q, GRN_OP_AND, weight, mode, option); break; case GRN_QUERY_BUT : q->cur++; token = op_new(q, GRN_OP_BUT, weight, mode, option); break; case GRN_QUERY_ADJ_INC : q->cur++; if (weight < 127) { weight++; } token = op_new(q, GRN_OP_ADJUST, weight, mode, option); break; case GRN_QUERY_ADJ_DEC : q->cur++; if (weight > -128) { weight--; } token = op_new(q, GRN_OP_ADJUST, weight, mode, option); break; case GRN_QUERY_ADJ_NEG : q->cur++; token = op_new(q, GRN_OP_ADJUST, -1, mode, option); break; case GRN_QUERY_PARENL : q->cur++; token = get_expr(ctx, q); break; default : if ((token = get_word(ctx, q, &prefixp)) && token->u.b.value[0] == 'O' && token->u.b.value[1] == 'R' && token->u.b.size == 2) { cell_del(q); q->cur_expr--; token = op_new(q, GRN_OP_OR, weight, mode, option); } break; } } return cons(q, token, NIL); }
static void release_space_handler() { alloc_phrase(get_phrase()); release_space_handler_p = 0; }
void PinyinPhraseLib::refine_pinyin_lib () { PinyinKeyVector tmp_pinyin_lib; PinyinKeyVector::const_iterator result; PinyinKeyVector::const_iterator vit_begin; PinyinKeyVector::const_iterator vit_end; uint32 len; uint32 pinyin_offset; tmp_pinyin_lib.reserve (m_pinyin_lib.size () + 1); for (int i=SCIM_PHRASE_MAX_LENGTH-1; i>=0; i--) { for (PinyinPhraseTable::iterator tit=m_phrases[i].begin(); tit!=m_phrases[i].end(); tit++) { for (PinyinPhraseOffsetVector::iterator vit=tit->get_vector ().begin(); vit!=tit->get_vector ().end(); vit++) { len = get_phrase (vit->first).length (); if (len > 0) { vit_begin = m_pinyin_lib.begin () + vit->second; vit_end = vit_begin + len; for (result = tmp_pinyin_lib.begin (); result != tmp_pinyin_lib.end (); result ++) { uint32 j; for (j=0; j< len && result + j < tmp_pinyin_lib.end (); j++) { if (!m_pinyin_key_equal (*(result+j), *(vit_begin + j))) break; } if (j == len) break; } /* result = std::find_end (tmp_pinyin_lib.begin (), tmp_pinyin_lib.end (), vit_begin, vit_end, m_pinyin_key_equal); */ if (result != tmp_pinyin_lib.end ()) pinyin_offset = result - tmp_pinyin_lib.begin (); else { pinyin_offset = tmp_pinyin_lib.size (); for (uint32 j=0; j<len; j++) tmp_pinyin_lib.push_back (get_pinyin_key (vit->second + j)); } vit->second = pinyin_offset; } std::cout << "." << std::flush; } } } std::cout << "\n"; m_pinyin_lib = tmp_pinyin_lib; }