/** 检查词条,如果核心能够处理(有这个词条,或在没有其他候选的情况下能够ICW出来) * 返回1,否则返回0 */ int HaveItem(SYLLABLE *syllables, HZ *ci, int ci_length) { int i, count; CANDIDATE candidate[MAX_CANDIDATES]; if (ci_length < 2) return 0; count = ProcessCiCandidate(syllables, ci_length, 0, candidate, MAX_CANDIDATES, 0); if (count) //有候选 { for (i = 0; i < count; i++) { if (ci_length == candidate[i].word.item->ci_length && !strncmp((char*)ci, (char*)candidate[i].word.hz, sizeof(HZ) * ci_length)) return 1; //找到该词 } return 0; //有其他候选,不需要判断ICW } count = GetIcwCandidates(syllables, ci_length, candidate); if (!count) return 0; if (!strncmp((char*)ci, (char*)candidate[0].icw.hz, sizeof(HZ) * ci_length)) return 1; return 0; }
/* 完整处理音节到汉字候选。 * 处理过程: * 1. 检索汉字候选 * 2. 汉字候选排重 * 3. 候选排序 * 本函数将依照系统配置与IC配置中的当前模糊音、检索结果集以及输出集合 * 设置进行处理。 * * 参数: * syllable 音节 * *candidate_array 候选数组 * array_length 候选数组长度 * 返回值: * 返回候选数目 */ int ProcessZiCandidates(SYLLABLE syllable, CANDIDATE *candidate_array, int array_length, int zi_level) { int zi_count = 0, normal_zi_count = 0, small_count = 0, small_ci_count = 0, small_zi_count = 0; SYLLABLE small_syllables[2]; CANDIDATE small_word_candidates[0x10]; //寻找类似xian输入西安的候选,双拼不用考虑 if (/*pim_config->special_parse_pin_yin && */pim_config->pinyin_mode != PINYIN_SHUANGPIN && GetSmallSyllables(syllable, small_syllables)) { int i; small_ci_count = ProcessCiCandidate(small_syllables, 2, 0, small_word_candidates, _SizeOf(small_word_candidates), 1); for (i = 0; i < small_ci_count && i < array_length; i++) { candidate_array[i].type = CAND_TYPE_ZI; candidate_array[i].hz.is_word = 1; candidate_array[i].hz.word_item = small_word_candidates[i].word.item; candidate_array[i].hz.origin_syllable = syllable; candidate_array[i].hz.top_pos = DEFAULT_TOP_POS; candidate_array[i].hz.cache_pos = GetCiCacheInfo(GetItemHZPtr(candidate_array[i].hz.word_item), candidate_array[i].hz.word_item->ci_length); } //检索小音节拆分字 small_zi_count = GetZiCandidates(small_syllables[0], candidate_array + small_ci_count, array_length - small_ci_count, pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0, zi_level, pim_config->hz_output_mode); //如果没有找到汉字,如eng,则必须扩大汉字的范围进行查找 if (!small_zi_count) small_zi_count = GetZiCandidates(small_syllables[0], candidate_array + small_ci_count, array_length - small_ci_count, pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0, HZ_ALL_USED, HZ_OUTPUT_HANZI_ALL); for (i = small_ci_count; i < small_ci_count + small_zi_count; i++) { candidate_array[i].hz.hz_type = ZI_TYPE_OTHER; } small_count = small_ci_count + small_zi_count; } //检索普通字 normal_zi_count = GetZiCandidates(syllable, candidate_array + small_count, array_length - small_count, pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0, zi_level, pim_config->hz_output_mode); //如果没有找到汉字,如eng,则必须扩大汉字的范围进行查找 if (!normal_zi_count) normal_zi_count = GetZiCandidates(syllable, candidate_array + small_count, array_length - small_count, pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0, HZ_ALL_USED, HZ_OUTPUT_HANZI_ALL); zi_count = small_zi_count + normal_zi_count; //去掉无法显示的汉字 zi_count = DeleteUnreadableZiCandidates(candidate_array + small_ci_count, zi_count); //排重(只针对字) zi_count = UnifyZiCandidates(candidate_array + small_ci_count, zi_count); //排序(字和小音节词都要排) SortZiCandidates(candidate_array, small_ci_count + zi_count); return small_ci_count + zi_count; }